🏄‍♂️ refactor: Optimize Reasoning UI & Token Streaming (#5546)

*  feat: Implement Show Thinking feature; refactor: testing thinking render optimizations

*  feat: Refactor Thinking component styles and enhance Markdown rendering

* chore: add back removed code, revert type changes

* chore: Add back resetCounter effect to Markdown component for improved code block indexing

* chore: bump @librechat/agents and google langchain packages

* WIP: reasoning type updates

* WIP: first pass, reasoning content blocks

* chore: revert code

* chore: bump @librechat/agents

* refactor: optimize reasoning tag handling

* style: ul indent padding

* feat: add Reasoning component to handle reasoning display

* feat: first pass, content reasoning part styling

* refactor: add content placeholder for endpoints using new stream handler

* refactor: only cache messages when requesting stream audio

* fix: circular dep.

* fix: add default param

* refactor: tts, only request after message stream, fix chrome autoplay

* style: update label for submitting state and add localization for 'Thinking...'

* fix: improve global audio pause logic and reset active run ID

* fix: handle artifact edge cases

* fix: remove unnecessary console log from artifact update test

* feat: add support for continued message handling with new streaming method

---------

Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
This commit is contained in:
Danny Avila 2025-01-29 19:46:58 -05:00 committed by GitHub
parent d60a149ad9
commit 591a019766
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 1791 additions and 726 deletions

View file

@ -7,15 +7,12 @@ const {
EModelEndpoint,
ErrorTypes,
Constants,
CacheKeys,
Time,
} = require('librechat-data-provider');
const { getMessages, saveMessage, updateMessage, saveConvo } = require('~/models');
const { addSpaceIfNeeded, isEnabled } = require('~/server/utils');
const { truncateToolCallOutputs } = require('./prompts');
const checkBalance = require('~/models/checkBalance');
const { getFiles } = require('~/models/File');
const { getLogStores } = require('~/cache');
const TextStream = require('./TextStream');
const { logger } = require('~/config');
@ -54,6 +51,12 @@ class BaseClient {
this.outputTokensKey = 'completion_tokens';
/** @type {Set<string>} */
this.savedMessageIds = new Set();
/**
* Flag to determine if the client re-submitted the latest assistant message.
* @type {boolean | undefined} */
this.continued;
/** @type {TMessage[]} */
this.currentMessages = [];
}
setOptions() {
@ -589,6 +592,7 @@ class BaseClient {
} else {
latestMessage.text = generation;
}
this.continued = true;
} else {
this.currentMessages.push(userMessage);
}
@ -720,17 +724,6 @@ class BaseClient {
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
this.savedMessageIds.add(responseMessage.messageId);
if (responseMessage.text) {
const messageCache = getLogStores(CacheKeys.MESSAGES);
messageCache.set(
responseMessageId,
{
text: responseMessage.text,
complete: true,
},
Time.FIVE_MINUTES,
);
}
delete responseMessage.tokenCount;
return responseMessage;
}

View file

@ -1,6 +1,7 @@
const OpenAI = require('openai');
const { OllamaClient } = require('./OllamaClient');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
const {
Constants,
ImageDetail,
@ -28,17 +29,17 @@ const {
createContextHandlers,
} = require('./prompts');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const { addSpaceIfNeeded, isEnabled, sleep } = require('~/server/utils');
const Tokenizer = require('~/server/services/Tokenizer');
const { spendTokens } = require('~/models/spendTokens');
const { isEnabled, sleep } = require('~/server/utils');
const { handleOpenAIErrors } = require('./tools/util');
const { createLLM, RunManager } = require('./llm');
const { logger, sendEvent } = require('~/config');
const ChatGPTClient = require('./ChatGPTClient');
const { summaryBuffer } = require('./memory');
const { runTitleChain } = require('./chains');
const { tokenSplit } = require('./document');
const BaseClient = require('./BaseClient');
const { logger } = require('~/config');
class OpenAIClient extends BaseClient {
constructor(apiKey, options = {}) {
@ -65,6 +66,8 @@ class OpenAIClient extends BaseClient {
this.usage;
/** @type {boolean|undefined} */
this.isO1Model;
/** @type {SplitStreamHandler | undefined} */
this.streamHandler;
}
// TODO: PluginsClient calls this 3x, unneeded
@ -1064,11 +1067,36 @@ ${convo}
});
}
getStreamText() {
if (!this.streamHandler) {
return '';
}
const reasoningTokens =
this.streamHandler.reasoningTokens.length > 0
? `:::thinking\n${this.streamHandler.reasoningTokens.join('')}\n:::\n`
: '';
return `${reasoningTokens}${this.streamHandler.tokens.join('')}`;
}
getMessageMapMethod() {
/**
* @param {TMessage} msg
*/
return (msg) => {
if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) {
msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim();
}
return msg;
};
}
async chatCompletion({ payload, onProgress, abortController = null }) {
let error = null;
let intermediateReply = [];
const errorCallback = (err) => (error = err);
const intermediateReply = [];
const reasoningTokens = [];
try {
if (!abortController) {
abortController = new AbortController();
@ -1266,6 +1294,19 @@ ${convo}
reasoningKey = 'reasoning';
}
this.streamHandler = new SplitStreamHandler({
reasoningKey,
accumulate: true,
runId: this.responseMessageId,
handlers: {
[GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
[GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
[GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
},
});
intermediateReply = this.streamHandler.tokens;
if (modelOptions.stream) {
streamPromise = new Promise((resolve) => {
streamResolve = resolve;
@ -1292,41 +1333,36 @@ ${convo}
}
if (typeof finalMessage.content !== 'string' || finalMessage.content.trim() === '') {
finalChatCompletion.choices[0].message.content = intermediateReply.join('');
finalChatCompletion.choices[0].message.content = this.streamHandler.tokens.join('');
}
})
.on('finalMessage', (message) => {
if (message?.role !== 'assistant') {
stream.messages.push({ role: 'assistant', content: intermediateReply.join('') });
stream.messages.push({
role: 'assistant',
content: this.streamHandler.tokens.join(''),
});
UnexpectedRoleError = true;
}
});
let reasoningCompleted = false;
if (this.continued === true) {
const latestText = addSpaceIfNeeded(
this.currentMessages[this.currentMessages.length - 1]?.text ?? '',
);
this.streamHandler.handle({
choices: [
{
delta: {
content: latestText,
},
},
],
});
}
for await (const chunk of stream) {
if (chunk?.choices?.[0]?.delta?.[reasoningKey]) {
if (reasoningTokens.length === 0) {
const thinkingDirective = '<think>\n';
intermediateReply.push(thinkingDirective);
reasoningTokens.push(thinkingDirective);
onProgress(thinkingDirective);
}
const reasoning_content = chunk?.choices?.[0]?.delta?.[reasoningKey] || '';
intermediateReply.push(reasoning_content);
reasoningTokens.push(reasoning_content);
onProgress(reasoning_content);
}
const token = chunk?.choices?.[0]?.delta?.content || '';
if (!reasoningCompleted && reasoningTokens.length > 0 && token) {
reasoningCompleted = true;
const separatorTokens = '\n</think>\n';
reasoningTokens.push(separatorTokens);
onProgress(separatorTokens);
}
intermediateReply.push(token);
onProgress(token);
this.streamHandler.handle(chunk);
if (abortController.signal.aborted) {
stream.controller.abort();
break;
@ -1369,7 +1405,7 @@ ${convo}
if (!Array.isArray(choices) || choices.length === 0) {
logger.warn('[OpenAIClient] Chat completion response has no choices');
return intermediateReply.join('');
return this.streamHandler.tokens.join('');
}
const { message, finish_reason } = choices[0] ?? {};
@ -1379,11 +1415,11 @@ ${convo}
if (!message) {
logger.warn('[OpenAIClient] Message is undefined in chatCompletion response');
return intermediateReply.join('');
return this.streamHandler.tokens.join('');
}
if (typeof message.content !== 'string' || message.content.trim() === '') {
const reply = intermediateReply.join('');
const reply = this.streamHandler.tokens.join('');
logger.debug(
'[OpenAIClient] chatCompletion: using intermediateReply due to empty message.content',
{ intermediateReply: reply },
@ -1391,8 +1427,18 @@ ${convo}
return reply;
}
if (reasoningTokens.length > 0 && this.options.context !== 'title') {
return reasoningTokens.join('') + message.content;
if (
this.streamHandler.reasoningTokens.length > 0 &&
this.options.context !== 'title' &&
!message.content.startsWith('<think>')
) {
return this.getStreamText();
} else if (
this.streamHandler.reasoningTokens.length > 0 &&
this.options.context !== 'title' &&
message.content.startsWith('<think>')
) {
return message.content.replace('<think>', ':::thinking').replace('</think>', ':::');
}
return message.content;

View file

@ -1,5 +1,4 @@
const OpenAIClient = require('./OpenAIClient');
const { CacheKeys, Time } = require('librechat-data-provider');
const { CallbackManager } = require('@langchain/core/callbacks/manager');
const { BufferMemory, ChatMessageHistory } = require('langchain/memory');
const { addImages, buildErrorInput, buildPromptPrefix } = require('./output_parsers');
@ -11,7 +10,6 @@ const checkBalance = require('~/models/checkBalance');
const { isEnabled } = require('~/server/utils');
const { extractBaseURL } = require('~/utils');
const { loadTools } = require('./tools/util');
const { getLogStores } = require('~/cache');
const { logger } = require('~/config');
class PluginsClient extends OpenAIClient {
@ -256,17 +254,6 @@ class PluginsClient extends OpenAIClient {
}
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
if (responseMessage.text) {
const messageCache = getLogStores(CacheKeys.MESSAGES);
messageCache.set(
responseMessage.messageId,
{
text: responseMessage.text,
complete: true,
},
Time.FIVE_MINUTES,
);
}
delete responseMessage.tokenCount;
return { ...responseMessage, ...result };
}