mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-03 09:08:52 +01:00
🏄♂️ refactor: Optimize Reasoning UI & Token Streaming (#5546)
* ✨ feat: Implement Show Thinking feature; refactor: testing thinking render optimizations * ✨ feat: Refactor Thinking component styles and enhance Markdown rendering * chore: add back removed code, revert type changes * chore: Add back resetCounter effect to Markdown component for improved code block indexing * chore: bump @librechat/agents and google langchain packages * WIP: reasoning type updates * WIP: first pass, reasoning content blocks * chore: revert code * chore: bump @librechat/agents * refactor: optimize reasoning tag handling * style: ul indent padding * feat: add Reasoning component to handle reasoning display * feat: first pass, content reasoning part styling * refactor: add content placeholder for endpoints using new stream handler * refactor: only cache messages when requesting stream audio * fix: circular dep. * fix: add default param * refactor: tts, only request after message stream, fix chrome autoplay * style: update label for submitting state and add localization for 'Thinking...' * fix: improve global audio pause logic and reset active run ID * fix: handle artifact edge cases * fix: remove unnecessary console log from artifact update test * feat: add support for continued message handling with new streaming method --------- Co-authored-by: Marco Beretta <81851188+berry-13@users.noreply.github.com>
This commit is contained in:
parent
d60a149ad9
commit
591a019766
48 changed files with 1791 additions and 726 deletions
|
|
@ -7,15 +7,12 @@ const {
|
|||
EModelEndpoint,
|
||||
ErrorTypes,
|
||||
Constants,
|
||||
CacheKeys,
|
||||
Time,
|
||||
} = require('librechat-data-provider');
|
||||
const { getMessages, saveMessage, updateMessage, saveConvo } = require('~/models');
|
||||
const { addSpaceIfNeeded, isEnabled } = require('~/server/utils');
|
||||
const { truncateToolCallOutputs } = require('./prompts');
|
||||
const checkBalance = require('~/models/checkBalance');
|
||||
const { getFiles } = require('~/models/File');
|
||||
const { getLogStores } = require('~/cache');
|
||||
const TextStream = require('./TextStream');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
|
|
@ -54,6 +51,12 @@ class BaseClient {
|
|||
this.outputTokensKey = 'completion_tokens';
|
||||
/** @type {Set<string>} */
|
||||
this.savedMessageIds = new Set();
|
||||
/**
|
||||
* Flag to determine if the client re-submitted the latest assistant message.
|
||||
* @type {boolean | undefined} */
|
||||
this.continued;
|
||||
/** @type {TMessage[]} */
|
||||
this.currentMessages = [];
|
||||
}
|
||||
|
||||
setOptions() {
|
||||
|
|
@ -589,6 +592,7 @@ class BaseClient {
|
|||
} else {
|
||||
latestMessage.text = generation;
|
||||
}
|
||||
this.continued = true;
|
||||
} else {
|
||||
this.currentMessages.push(userMessage);
|
||||
}
|
||||
|
|
@ -720,17 +724,6 @@ class BaseClient {
|
|||
|
||||
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
|
||||
this.savedMessageIds.add(responseMessage.messageId);
|
||||
if (responseMessage.text) {
|
||||
const messageCache = getLogStores(CacheKeys.MESSAGES);
|
||||
messageCache.set(
|
||||
responseMessageId,
|
||||
{
|
||||
text: responseMessage.text,
|
||||
complete: true,
|
||||
},
|
||||
Time.FIVE_MINUTES,
|
||||
);
|
||||
}
|
||||
delete responseMessage.tokenCount;
|
||||
return responseMessage;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
const OpenAI = require('openai');
|
||||
const { OllamaClient } = require('./OllamaClient');
|
||||
const { HttpsProxyAgent } = require('https-proxy-agent');
|
||||
const { SplitStreamHandler, GraphEvents } = require('@librechat/agents');
|
||||
const {
|
||||
Constants,
|
||||
ImageDetail,
|
||||
|
|
@ -28,17 +29,17 @@ const {
|
|||
createContextHandlers,
|
||||
} = require('./prompts');
|
||||
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
|
||||
const { addSpaceIfNeeded, isEnabled, sleep } = require('~/server/utils');
|
||||
const Tokenizer = require('~/server/services/Tokenizer');
|
||||
const { spendTokens } = require('~/models/spendTokens');
|
||||
const { isEnabled, sleep } = require('~/server/utils');
|
||||
const { handleOpenAIErrors } = require('./tools/util');
|
||||
const { createLLM, RunManager } = require('./llm');
|
||||
const { logger, sendEvent } = require('~/config');
|
||||
const ChatGPTClient = require('./ChatGPTClient');
|
||||
const { summaryBuffer } = require('./memory');
|
||||
const { runTitleChain } = require('./chains');
|
||||
const { tokenSplit } = require('./document');
|
||||
const BaseClient = require('./BaseClient');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
class OpenAIClient extends BaseClient {
|
||||
constructor(apiKey, options = {}) {
|
||||
|
|
@ -65,6 +66,8 @@ class OpenAIClient extends BaseClient {
|
|||
this.usage;
|
||||
/** @type {boolean|undefined} */
|
||||
this.isO1Model;
|
||||
/** @type {SplitStreamHandler | undefined} */
|
||||
this.streamHandler;
|
||||
}
|
||||
|
||||
// TODO: PluginsClient calls this 3x, unneeded
|
||||
|
|
@ -1064,11 +1067,36 @@ ${convo}
|
|||
});
|
||||
}
|
||||
|
||||
getStreamText() {
|
||||
if (!this.streamHandler) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const reasoningTokens =
|
||||
this.streamHandler.reasoningTokens.length > 0
|
||||
? `:::thinking\n${this.streamHandler.reasoningTokens.join('')}\n:::\n`
|
||||
: '';
|
||||
|
||||
return `${reasoningTokens}${this.streamHandler.tokens.join('')}`;
|
||||
}
|
||||
|
||||
getMessageMapMethod() {
|
||||
/**
|
||||
* @param {TMessage} msg
|
||||
*/
|
||||
return (msg) => {
|
||||
if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) {
|
||||
msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim();
|
||||
}
|
||||
|
||||
return msg;
|
||||
};
|
||||
}
|
||||
|
||||
async chatCompletion({ payload, onProgress, abortController = null }) {
|
||||
let error = null;
|
||||
let intermediateReply = [];
|
||||
const errorCallback = (err) => (error = err);
|
||||
const intermediateReply = [];
|
||||
const reasoningTokens = [];
|
||||
try {
|
||||
if (!abortController) {
|
||||
abortController = new AbortController();
|
||||
|
|
@ -1266,6 +1294,19 @@ ${convo}
|
|||
reasoningKey = 'reasoning';
|
||||
}
|
||||
|
||||
this.streamHandler = new SplitStreamHandler({
|
||||
reasoningKey,
|
||||
accumulate: true,
|
||||
runId: this.responseMessageId,
|
||||
handlers: {
|
||||
[GraphEvents.ON_RUN_STEP]: (event) => sendEvent(this.options.res, event),
|
||||
[GraphEvents.ON_MESSAGE_DELTA]: (event) => sendEvent(this.options.res, event),
|
||||
[GraphEvents.ON_REASONING_DELTA]: (event) => sendEvent(this.options.res, event),
|
||||
},
|
||||
});
|
||||
|
||||
intermediateReply = this.streamHandler.tokens;
|
||||
|
||||
if (modelOptions.stream) {
|
||||
streamPromise = new Promise((resolve) => {
|
||||
streamResolve = resolve;
|
||||
|
|
@ -1292,41 +1333,36 @@ ${convo}
|
|||
}
|
||||
|
||||
if (typeof finalMessage.content !== 'string' || finalMessage.content.trim() === '') {
|
||||
finalChatCompletion.choices[0].message.content = intermediateReply.join('');
|
||||
finalChatCompletion.choices[0].message.content = this.streamHandler.tokens.join('');
|
||||
}
|
||||
})
|
||||
.on('finalMessage', (message) => {
|
||||
if (message?.role !== 'assistant') {
|
||||
stream.messages.push({ role: 'assistant', content: intermediateReply.join('') });
|
||||
stream.messages.push({
|
||||
role: 'assistant',
|
||||
content: this.streamHandler.tokens.join(''),
|
||||
});
|
||||
UnexpectedRoleError = true;
|
||||
}
|
||||
});
|
||||
|
||||
let reasoningCompleted = false;
|
||||
if (this.continued === true) {
|
||||
const latestText = addSpaceIfNeeded(
|
||||
this.currentMessages[this.currentMessages.length - 1]?.text ?? '',
|
||||
);
|
||||
this.streamHandler.handle({
|
||||
choices: [
|
||||
{
|
||||
delta: {
|
||||
content: latestText,
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (chunk?.choices?.[0]?.delta?.[reasoningKey]) {
|
||||
if (reasoningTokens.length === 0) {
|
||||
const thinkingDirective = '<think>\n';
|
||||
intermediateReply.push(thinkingDirective);
|
||||
reasoningTokens.push(thinkingDirective);
|
||||
onProgress(thinkingDirective);
|
||||
}
|
||||
const reasoning_content = chunk?.choices?.[0]?.delta?.[reasoningKey] || '';
|
||||
intermediateReply.push(reasoning_content);
|
||||
reasoningTokens.push(reasoning_content);
|
||||
onProgress(reasoning_content);
|
||||
}
|
||||
|
||||
const token = chunk?.choices?.[0]?.delta?.content || '';
|
||||
if (!reasoningCompleted && reasoningTokens.length > 0 && token) {
|
||||
reasoningCompleted = true;
|
||||
const separatorTokens = '\n</think>\n';
|
||||
reasoningTokens.push(separatorTokens);
|
||||
onProgress(separatorTokens);
|
||||
}
|
||||
|
||||
intermediateReply.push(token);
|
||||
onProgress(token);
|
||||
this.streamHandler.handle(chunk);
|
||||
if (abortController.signal.aborted) {
|
||||
stream.controller.abort();
|
||||
break;
|
||||
|
|
@ -1369,7 +1405,7 @@ ${convo}
|
|||
|
||||
if (!Array.isArray(choices) || choices.length === 0) {
|
||||
logger.warn('[OpenAIClient] Chat completion response has no choices');
|
||||
return intermediateReply.join('');
|
||||
return this.streamHandler.tokens.join('');
|
||||
}
|
||||
|
||||
const { message, finish_reason } = choices[0] ?? {};
|
||||
|
|
@ -1379,11 +1415,11 @@ ${convo}
|
|||
|
||||
if (!message) {
|
||||
logger.warn('[OpenAIClient] Message is undefined in chatCompletion response');
|
||||
return intermediateReply.join('');
|
||||
return this.streamHandler.tokens.join('');
|
||||
}
|
||||
|
||||
if (typeof message.content !== 'string' || message.content.trim() === '') {
|
||||
const reply = intermediateReply.join('');
|
||||
const reply = this.streamHandler.tokens.join('');
|
||||
logger.debug(
|
||||
'[OpenAIClient] chatCompletion: using intermediateReply due to empty message.content',
|
||||
{ intermediateReply: reply },
|
||||
|
|
@ -1391,8 +1427,18 @@ ${convo}
|
|||
return reply;
|
||||
}
|
||||
|
||||
if (reasoningTokens.length > 0 && this.options.context !== 'title') {
|
||||
return reasoningTokens.join('') + message.content;
|
||||
if (
|
||||
this.streamHandler.reasoningTokens.length > 0 &&
|
||||
this.options.context !== 'title' &&
|
||||
!message.content.startsWith('<think>')
|
||||
) {
|
||||
return this.getStreamText();
|
||||
} else if (
|
||||
this.streamHandler.reasoningTokens.length > 0 &&
|
||||
this.options.context !== 'title' &&
|
||||
message.content.startsWith('<think>')
|
||||
) {
|
||||
return message.content.replace('<think>', ':::thinking').replace('</think>', ':::');
|
||||
}
|
||||
|
||||
return message.content;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
const OpenAIClient = require('./OpenAIClient');
|
||||
const { CacheKeys, Time } = require('librechat-data-provider');
|
||||
const { CallbackManager } = require('@langchain/core/callbacks/manager');
|
||||
const { BufferMemory, ChatMessageHistory } = require('langchain/memory');
|
||||
const { addImages, buildErrorInput, buildPromptPrefix } = require('./output_parsers');
|
||||
|
|
@ -11,7 +10,6 @@ const checkBalance = require('~/models/checkBalance');
|
|||
const { isEnabled } = require('~/server/utils');
|
||||
const { extractBaseURL } = require('~/utils');
|
||||
const { loadTools } = require('./tools/util');
|
||||
const { getLogStores } = require('~/cache');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
class PluginsClient extends OpenAIClient {
|
||||
|
|
@ -256,17 +254,6 @@ class PluginsClient extends OpenAIClient {
|
|||
}
|
||||
|
||||
this.responsePromise = this.saveMessageToDatabase(responseMessage, saveOptions, user);
|
||||
if (responseMessage.text) {
|
||||
const messageCache = getLogStores(CacheKeys.MESSAGES);
|
||||
messageCache.set(
|
||||
responseMessage.messageId,
|
||||
{
|
||||
text: responseMessage.text,
|
||||
complete: true,
|
||||
},
|
||||
Time.FIVE_MINUTES,
|
||||
);
|
||||
}
|
||||
delete responseMessage.tokenCount;
|
||||
return { ...responseMessage, ...result };
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue