LibreChat/api/app/clients/OpenAIClient.js
Danny Avila 29ef91b4dd
🧠 feat: User Memories for Conversational Context (#7760)
* 🧠 feat: User Memories for Conversational Context

chore: mcp typing, use `t`

WIP: first pass, Memories UI

- Added MemoryViewer component for displaying, editing, and deleting user memories.
- Integrated data provider hooks for fetching, updating, and deleting memories.
- Implemented pagination and loading states for better user experience.
- Created unit tests for MemoryViewer to ensure functionality and interaction with data provider.
- Updated translation files to include new UI strings related to memories.

chore: move mcp-related files to own directory

chore: rename librechat-mcp to librechat-api

WIP: first pass, memory processing and data schemas

chore: linting in fileSearch.js query description

chore: rename librechat-api to @librechat/api across the project

WIP: first pass, functional memory agent

feat: add MemoryEditDialog and MemoryViewer components for managing user memories

- Introduced MemoryEditDialog for editing memory entries with validation and toast notifications.
- Updated MemoryViewer to support editing and deleting memories, including pagination and loading states.
- Enhanced data provider to handle memory updates with optional original key for better management.
- Added new localization strings for memory-related UI elements.

feat: add memory permissions management

- Implemented memory permissions in the backend, allowing roles to have specific permissions for using, creating, updating, and reading memories.
- Added new API endpoints for updating memory permissions associated with roles.
- Created a new AdminSettings component for managing memory permissions in the frontend.
- Integrated memory permissions into the existing roles and permissions schemas.
- Updated the interface to include memory settings and permissions.
- Enhanced the MemoryViewer component to conditionally render admin settings based on user roles.
- Added localization support for memory permissions in the translation files.

feat: move AdminSettings component to a new position in MemoryViewer for better visibility

refactor: clean up commented code in MemoryViewer component

feat: enhance MemoryViewer with search functionality and improve MemoryEditDialog integration

- Added a search input to filter memories in the MemoryViewer component.
- Refactored MemoryEditDialog to accept children for better customization.
- Updated MemoryViewer to utilize the new EditMemoryButton and DeleteMemoryButton components for editing and deleting memories.
- Improved localization support by adding new strings for memory filtering and deletion confirmation.

refactor: optimize memory filtering in MemoryViewer using match-sorter

- Replaced manual filtering logic with match-sorter for improved search functionality.
- Enhanced performance and readability of the filteredMemories computation.

feat: enhance MemoryEditDialog with triggerRef and improve updateMemory mutation handling

feat: implement access control for MemoryEditDialog and MemoryViewer components

refactor: remove commented out code and create runMemory method

refactor: rename role based files

feat: implement access control for memory usage in AgentClient

refactor: simplify checkVisionRequest method in AgentClient by removing commented-out code

refactor: make `agents` dir in api package

refactor: migrate Azure utilities to TypeScript and consolidate imports

refactor: move sanitizeFilename function to a new file and update imports, add related tests

refactor: update LLM configuration types and consolidate Azure options in the API package

chore: linting

chore: import order

refactor: replace getLLMConfig with getOpenAIConfig and remove unused LLM configuration file

chore: update winston-daily-rotate-file to version 5.0.0 and add object-hash dependency in package-lock.json

refactor: move primeResources and optionalChainWithEmptyCheck functions to resources.ts and update imports

refactor: move createRun function to a new run.ts file and update related imports

fix: ensure safeAttachments is correctly typed as an array of TFile

chore: add node-fetch dependency and refactor fetch-related functions into packages/api/utils, removing the old generators file

refactor: enhance TEndpointOption type by using Pick to streamline endpoint fields and add new properties for model parameters and client options

feat: implement initializeOpenAIOptions function and update OpenAI types for enhanced configuration handling

fix: update types due to new TEndpointOption typing

fix: ensure safe access to group parameters in initializeOpenAIOptions function

fix: remove redundant API key validation comment in initializeOpenAIOptions function

refactor: rename initializeOpenAIOptions to initializeOpenAI for consistency and update related documentation

refactor: decouple req.body fields and tool loading from initializeAgentOptions

chore: linting

refactor: adjust column widths in MemoryViewer for improved layout

refactor: simplify agent initialization by creating loadAgent function and removing unused code

feat: add memory configuration loading and validation functions

WIP: first pass, memory processing with config

feat: implement memory callback and artifact handling

feat: implement memory artifacts display and processing updates

feat: add memory configuration options and schema validation for validKeys

fix: update MemoryEditDialog and MemoryViewer to handle memory state and display improvements

refactor: remove padding from BookmarkTable and MemoryViewer headers for consistent styling

WIP: initial tokenLimit config and move Tokenizer to @librechat/api

refactor: update mongoMeili plugin methods to use callback for better error handling

feat: enhance memory management with token tracking and usage metrics

- Added token counting for memory entries to enforce limits and provide usage statistics.
- Updated memory retrieval and update routes to include total token usage and limit.
- Enhanced MemoryEditDialog and MemoryViewer components to display memory usage and token information.
- Refactored memory processing functions to handle token limits and provide feedback on memory capacity.

feat: implement memory artifact handling in attachment handler

- Enhanced useAttachmentHandler to process memory artifacts when receiving updates.
- Introduced handleMemoryArtifact utility to manage memory updates and deletions.
- Updated query client to reflect changes in memory state based on incoming data.

refactor: restructure web search key extraction logic

- Moved the logic for extracting API keys from the webSearchAuth configuration into a dedicated function, getWebSearchKeys.
- Updated webSearchKeys to utilize the new function for improved clarity and maintainability.
- Prevents build time errors

feat: add personalization settings and memory preferences management

- Introduced a new Personalization tab in settings to manage user memory preferences.
- Implemented API endpoints and client-side logic for updating memory preferences.
- Enhanced user interface components to reflect personalization options and memory usage.
- Updated permissions to allow users to opt out of memory features.
- Added localization support for new settings and messages related to personalization.

style: personalization switch class

feat: add PersonalizationIcon and align Side Panel UI

feat: implement memory creation functionality

- Added a new API endpoint for creating memory entries, including validation for key and value.
- Introduced MemoryCreateDialog component for user interface to facilitate memory creation.
- Integrated token limit checks to prevent exceeding user memory capacity.
- Updated MemoryViewer to include a button for opening the memory creation dialog.
- Enhanced localization support for new messages related to memory creation.

feat: enhance message processing with configurable window size

- Updated AgentClient to use a configurable message window size for processing messages.
- Introduced messageWindowSize option in memory configuration schema with a default value of 5.
- Improved logic for selecting messages to process based on the configured window size.

chore: update librechat-data-provider version to 0.7.87 in package.json and package-lock.json

chore: remove OpenAPIPlugin and its associated tests

chore: remove MIGRATION_README.md as migration tasks are completed

ci: fix backend tests

chore: remove unused translation keys from localization file

chore: remove problematic test file and unused var in AgentClient

chore: remove unused import and import directly for JSDoc

* feat: add api package build stage in Dockerfile for improved modularity

* docs: reorder build steps in contributing guide for clarity
2025-06-07 18:52:22 -04:00

1582 lines
50 KiB
JavaScript

const { OllamaClient } = require('./OllamaClient');
const { HttpsProxyAgent } = require('https-proxy-agent');
const { SplitStreamHandler, CustomOpenAIClient: OpenAI } = require('@librechat/agents');
const {
isEnabled,
Tokenizer,
createFetch,
constructAzureURL,
genAzureChatCompletion,
createStreamEventHandlers,
} = require('@librechat/api');
const {
Constants,
ImageDetail,
ContentTypes,
parseTextParts,
EModelEndpoint,
resolveHeaders,
KnownEndpoints,
openAISettings,
ImageDetailCost,
CohereConstants,
getResponseSender,
validateVisionModel,
mapModelToAzureConfig,
} = require('librechat-data-provider');
const {
truncateText,
formatMessage,
CUT_OFF_PROMPT,
titleInstruction,
createContextHandlers,
} = require('./prompts');
const { extractBaseURL, getModelMaxTokens, getModelMaxOutputTokens } = require('~/utils');
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
const { addSpaceIfNeeded, sleep } = require('~/server/utils');
const { spendTokens } = require('~/models/spendTokens');
const { handleOpenAIErrors } = require('./tools/util');
const { createLLM, RunManager } = require('./llm');
const ChatGPTClient = require('./ChatGPTClient');
const { summaryBuffer } = require('./memory');
const { runTitleChain } = require('./chains');
const { tokenSplit } = require('./document');
const BaseClient = require('./BaseClient');
const { logger } = require('~/config');
class OpenAIClient extends BaseClient {
constructor(apiKey, options = {}) {
super(apiKey, options);
this.ChatGPTClient = new ChatGPTClient();
this.buildPrompt = this.ChatGPTClient.buildPrompt.bind(this);
/** @type {getCompletion} */
this.getCompletion = this.ChatGPTClient.getCompletion.bind(this);
/** @type {cohereChatCompletion} */
this.cohereChatCompletion = this.ChatGPTClient.cohereChatCompletion.bind(this);
this.contextStrategy = options.contextStrategy
? options.contextStrategy.toLowerCase()
: 'discard';
this.shouldSummarize = this.contextStrategy === 'summarize';
/** @type {AzureOptions} */
this.azure = options.azure || false;
this.setOptions(options);
this.metadata = {};
/** @type {string | undefined} - The API Completions URL */
this.completionsUrl;
/** @type {OpenAIUsageMetadata | undefined} */
this.usage;
/** @type {boolean|undefined} */
this.isOmni;
/** @type {SplitStreamHandler | undefined} */
this.streamHandler;
}
// TODO: PluginsClient calls this 3x, unneeded
setOptions(options) {
if (this.options && !this.options.replaceOptions) {
this.options.modelOptions = {
...this.options.modelOptions,
...options.modelOptions,
};
delete options.modelOptions;
this.options = {
...this.options,
...options,
};
} else {
this.options = options;
}
if (this.options.openaiApiKey) {
this.apiKey = this.options.openaiApiKey;
}
this.modelOptions = Object.assign(
{
model: openAISettings.model.default,
},
this.modelOptions,
this.options.modelOptions,
);
this.defaultVisionModel = this.options.visionModel ?? 'gpt-4-vision-preview';
if (typeof this.options.attachments?.then === 'function') {
this.options.attachments.then((attachments) => this.checkVisionRequest(attachments));
} else {
this.checkVisionRequest(this.options.attachments);
}
const omniPattern = /\b(o\d)\b/i;
this.isOmni = omniPattern.test(this.modelOptions.model);
const { OPENAI_FORCE_PROMPT } = process.env ?? {};
const { reverseProxyUrl: reverseProxy } = this.options;
if (
!this.useOpenRouter &&
((reverseProxy && reverseProxy.includes(KnownEndpoints.openrouter)) ||
(this.options.endpoint &&
this.options.endpoint.toLowerCase().includes(KnownEndpoints.openrouter)))
) {
this.useOpenRouter = true;
}
if (this.options.endpoint?.toLowerCase() === 'ollama') {
this.isOllama = true;
}
this.FORCE_PROMPT =
isEnabled(OPENAI_FORCE_PROMPT) ||
(reverseProxy && reverseProxy.includes('completions') && !reverseProxy.includes('chat'));
if (typeof this.options.forcePrompt === 'boolean') {
this.FORCE_PROMPT = this.options.forcePrompt;
}
if (this.azure && process.env.AZURE_OPENAI_DEFAULT_MODEL) {
this.azureEndpoint = genAzureChatCompletion(this.azure, this.modelOptions.model, this);
this.modelOptions.model = process.env.AZURE_OPENAI_DEFAULT_MODEL;
} else if (this.azure) {
this.azureEndpoint = genAzureChatCompletion(this.azure, this.modelOptions.model, this);
}
const { model } = this.modelOptions;
this.isChatCompletion =
omniPattern.test(model) || model.includes('gpt') || this.useOpenRouter || !!reverseProxy;
this.isChatGptModel = this.isChatCompletion;
if (
model.includes('text-davinci') ||
model.includes('gpt-3.5-turbo-instruct') ||
this.FORCE_PROMPT
) {
this.isChatCompletion = false;
this.isChatGptModel = false;
}
const { isChatGptModel } = this;
this.isUnofficialChatGptModel =
model.startsWith('text-chat') || model.startsWith('text-davinci-002-render');
this.maxContextTokens =
this.options.maxContextTokens ??
getModelMaxTokens(
model,
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ??
4095; // 1 less than maximum
if (this.shouldSummarize) {
this.maxContextTokens = Math.floor(this.maxContextTokens / 2);
}
if (this.options.debug) {
logger.debug('[OpenAIClient] maxContextTokens', this.maxContextTokens);
}
this.maxResponseTokens =
this.modelOptions.max_tokens ??
getModelMaxOutputTokens(
model,
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ??
1024;
this.maxPromptTokens =
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) {
throw new Error(
`maxPromptTokens + max_tokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${
this.maxPromptTokens + this.maxResponseTokens
}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`,
);
}
this.sender =
this.options.sender ??
getResponseSender({
model: this.modelOptions.model,
endpoint: this.options.endpoint,
endpointType: this.options.endpointType,
modelDisplayLabel: this.options.modelDisplayLabel,
chatGptLabel: this.options.chatGptLabel || this.options.modelLabel,
});
this.userLabel = this.options.userLabel || 'User';
this.chatGptLabel = this.options.chatGptLabel || 'Assistant';
this.setupTokens();
if (reverseProxy) {
this.completionsUrl = reverseProxy;
this.langchainProxy = extractBaseURL(reverseProxy);
} else if (isChatGptModel) {
this.completionsUrl = 'https://api.openai.com/v1/chat/completions';
} else {
this.completionsUrl = 'https://api.openai.com/v1/completions';
}
if (this.azureEndpoint) {
this.completionsUrl = this.azureEndpoint;
}
if (this.azureEndpoint && this.options.debug) {
logger.debug('Using Azure endpoint');
}
return this;
}
/**
*
* Checks if the model is a vision model based on request attachments and sets the appropriate options:
* - Sets `this.modelOptions.model` to `gpt-4-vision-preview` if the request is a vision request.
* - Sets `this.isVisionModel` to `true` if vision request.
* - Deletes `this.modelOptions.stop` if vision request.
* @param {MongoFile[]} attachments
*/
checkVisionRequest(attachments) {
if (!attachments) {
return;
}
const availableModels = this.options.modelsConfig?.[this.options.endpoint];
if (!availableModels) {
return;
}
let visionRequestDetected = false;
for (const file of attachments) {
if (file?.type?.includes('image')) {
visionRequestDetected = true;
break;
}
}
if (!visionRequestDetected) {
return;
}
this.isVisionModel = validateVisionModel({ model: this.modelOptions.model, availableModels });
if (this.isVisionModel) {
delete this.modelOptions.stop;
return;
}
for (const model of availableModels) {
if (!validateVisionModel({ model, availableModels })) {
continue;
}
this.modelOptions.model = model;
this.isVisionModel = true;
delete this.modelOptions.stop;
return;
}
if (!availableModels.includes(this.defaultVisionModel)) {
return;
}
if (!validateVisionModel({ model: this.defaultVisionModel, availableModels })) {
return;
}
this.modelOptions.model = this.defaultVisionModel;
this.isVisionModel = true;
delete this.modelOptions.stop;
}
setupTokens() {
if (this.isChatCompletion) {
this.startToken = '||>';
this.endToken = '';
} else if (this.isUnofficialChatGptModel) {
this.startToken = '<|im_start|>';
this.endToken = '<|im_end|>';
} else {
this.startToken = '||>';
this.endToken = '';
}
}
getEncoding() {
return this.modelOptions?.model && /gpt-4[^-\s]/.test(this.modelOptions.model)
? 'o200k_base'
: 'cl100k_base';
}
/**
* Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
* @param {string} text - The text to get the token count for.
* @returns {number} The token count of the given text.
*/
getTokenCount(text) {
const encoding = this.getEncoding();
return Tokenizer.getTokenCount(text, encoding);
}
/**
* Calculate the token cost for an image based on its dimensions and detail level.
*
* @param {Object} image - The image object.
* @param {number} image.width - The width of the image.
* @param {number} image.height - The height of the image.
* @param {'low'|'high'|string|undefined} [image.detail] - The detail level ('low', 'high', or other).
* @returns {number} The calculated token cost.
*/
calculateImageTokenCost({ width, height, detail }) {
if (detail === 'low') {
return ImageDetailCost.LOW;
}
// Calculate the number of 512px squares
const numSquares = Math.ceil(width / 512) * Math.ceil(height / 512);
// Default to high detail cost calculation
return numSquares * ImageDetailCost.HIGH + ImageDetailCost.ADDITIONAL;
}
getSaveOptions() {
return {
artifacts: this.options.artifacts,
maxContextTokens: this.options.maxContextTokens,
chatGptLabel: this.options.chatGptLabel,
promptPrefix: this.options.promptPrefix,
resendFiles: this.options.resendFiles,
imageDetail: this.options.imageDetail,
modelLabel: this.options.modelLabel,
iconURL: this.options.iconURL,
greeting: this.options.greeting,
spec: this.options.spec,
...this.modelOptions,
};
}
getBuildMessagesOptions(opts) {
return {
isChatCompletion: this.isChatCompletion,
promptPrefix: opts.promptPrefix,
abortController: opts.abortController,
};
}
/**
*
* Adds image URLs to the message object and returns the files
*
* @param {TMessage[]} messages
* @param {MongoFile[]} files
* @returns {Promise<MongoFile[]>}
*/
async addImageURLs(message, attachments) {
const { files, image_urls } = await encodeAndFormat(
this.options.req,
attachments,
this.options.endpoint,
);
message.image_urls = image_urls.length ? image_urls : undefined;
return files;
}
async buildMessages(
messages,
parentMessageId,
{ isChatCompletion = false, promptPrefix = null },
opts,
) {
let orderedMessages = this.constructor.getMessagesForConversation({
messages,
parentMessageId,
summary: this.shouldSummarize,
});
if (!isChatCompletion) {
return await this.buildPrompt(orderedMessages, {
isChatGptModel: isChatCompletion,
promptPrefix,
});
}
let payload;
let instructions;
let tokenCountMap;
let promptTokens;
promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim();
if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) {
promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim();
}
if (this.options.attachments) {
const attachments = await this.options.attachments;
if (this.message_file_map) {
this.message_file_map[orderedMessages[orderedMessages.length - 1].messageId] = attachments;
} else {
this.message_file_map = {
[orderedMessages[orderedMessages.length - 1].messageId]: attachments,
};
}
const files = await this.addImageURLs(
orderedMessages[orderedMessages.length - 1],
attachments,
);
this.options.attachments = files;
}
if (this.message_file_map) {
this.contextHandlers = createContextHandlers(
this.options.req,
orderedMessages[orderedMessages.length - 1].text,
);
}
const formattedMessages = orderedMessages.map((message, i) => {
const formattedMessage = formatMessage({
message,
userName: this.options?.name,
assistantName: this.options?.chatGptLabel,
});
const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount;
/* If tokens were never counted, or, is a Vision request and the message has files, count again */
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage);
}
/* If message has files, calculate image token cost */
if (this.message_file_map && this.message_file_map[message.messageId]) {
const attachments = this.message_file_map[message.messageId];
for (const file of attachments) {
if (file.embedded) {
this.contextHandlers?.processFile(file);
continue;
}
if (file.metadata?.fileIdentifier) {
continue;
}
orderedMessages[i].tokenCount += this.calculateImageTokenCost({
width: file.width,
height: file.height,
detail: this.options.imageDetail ?? ImageDetail.auto,
});
}
}
return formattedMessage;
});
if (this.contextHandlers) {
this.augmentedPrompt = await this.contextHandlers.createContext();
promptPrefix = this.augmentedPrompt + promptPrefix;
}
const noSystemModelRegex = /\b(o1-preview|o1-mini)\b/i.test(this.modelOptions.model);
if (promptPrefix && !noSystemModelRegex) {
promptPrefix = `Instructions:\n${promptPrefix.trim()}`;
instructions = {
role: 'system',
content: promptPrefix,
};
if (this.contextStrategy) {
instructions.tokenCount = this.getTokenCountForMessage(instructions);
}
}
// TODO: need to handle interleaving instructions better
if (this.contextStrategy) {
({ payload, tokenCountMap, promptTokens, messages } = await this.handleContextStrategy({
instructions,
orderedMessages,
formattedMessages,
}));
}
const result = {
prompt: payload,
promptTokens,
messages,
};
/** EXPERIMENTAL */
if (promptPrefix && noSystemModelRegex) {
const lastUserMessageIndex = payload.findLastIndex((message) => message.role === 'user');
if (lastUserMessageIndex !== -1) {
if (Array.isArray(payload[lastUserMessageIndex].content)) {
const firstTextPartIndex = payload[lastUserMessageIndex].content.findIndex(
(part) => part.type === ContentTypes.TEXT,
);
if (firstTextPartIndex !== -1) {
const firstTextPart = payload[lastUserMessageIndex].content[firstTextPartIndex];
payload[lastUserMessageIndex].content[firstTextPartIndex].text =
`${promptPrefix}\n${firstTextPart.text}`;
} else {
payload[lastUserMessageIndex].content.unshift({
type: ContentTypes.TEXT,
text: promptPrefix,
});
}
} else {
payload[lastUserMessageIndex].content =
`${promptPrefix}\n${payload[lastUserMessageIndex].content}`;
}
}
}
if (tokenCountMap) {
tokenCountMap.instructions = instructions?.tokenCount;
result.tokenCountMap = tokenCountMap;
}
if (promptTokens >= 0 && typeof opts?.getReqData === 'function') {
opts.getReqData({ promptTokens });
}
return result;
}
/** @type {sendCompletion} */
async sendCompletion(payload, opts = {}) {
let reply = '';
let result = null;
let streamResult = null;
this.modelOptions.user = this.user;
const invalidBaseUrl = this.completionsUrl && extractBaseURL(this.completionsUrl) === null;
const useOldMethod = !!(invalidBaseUrl || !this.isChatCompletion);
if (typeof opts.onProgress === 'function' && useOldMethod) {
const completionResult = await this.getCompletion(
payload,
(progressMessage) => {
if (progressMessage === '[DONE]') {
return;
}
if (progressMessage.choices) {
streamResult = progressMessage;
}
let token = null;
if (this.isChatCompletion) {
token =
progressMessage.choices?.[0]?.delta?.content ?? progressMessage.choices?.[0]?.text;
} else {
token = progressMessage.choices?.[0]?.text;
}
if (!token && this.useOpenRouter) {
token = progressMessage.choices?.[0]?.message?.content;
}
// first event's delta content is always undefined
if (!token) {
return;
}
if (token === this.endToken) {
return;
}
opts.onProgress(token);
reply += token;
},
opts.onProgress,
opts.abortController || new AbortController(),
);
if (completionResult && typeof completionResult === 'string') {
reply = completionResult;
} else if (
completionResult &&
typeof completionResult === 'object' &&
Array.isArray(completionResult.choices)
) {
reply = completionResult.choices[0]?.text?.replace(this.endToken, '');
}
} else if (typeof opts.onProgress === 'function' || this.options.useChatCompletion) {
reply = await this.chatCompletion({
payload,
onProgress: opts.onProgress,
abortController: opts.abortController,
});
} else {
result = await this.getCompletion(
payload,
null,
opts.onProgress,
opts.abortController || new AbortController(),
);
if (result && typeof result === 'string') {
return result.trim();
}
logger.debug('[OpenAIClient] sendCompletion: result', { ...result });
if (this.isChatCompletion) {
reply = result.choices[0].message.content;
} else {
reply = result.choices[0].text.replace(this.endToken, '');
}
}
if (streamResult) {
const { finish_reason } = streamResult.choices[0];
this.metadata = { finish_reason };
}
return (reply ?? '').trim();
}
initializeLLM({
model = openAISettings.model.default,
modelName,
temperature = 0.2,
max_tokens,
streaming,
context,
tokenBuffer,
initialMessageCount,
conversationId,
}) {
const modelOptions = {
modelName: modelName ?? model,
temperature,
user: this.user,
};
if (max_tokens) {
modelOptions.max_tokens = max_tokens;
}
const configOptions = {};
if (this.langchainProxy) {
configOptions.basePath = this.langchainProxy;
}
if (this.useOpenRouter) {
configOptions.basePath = 'https://openrouter.ai/api/v1';
configOptions.baseOptions = {
headers: {
'HTTP-Referer': 'https://librechat.ai',
'X-Title': 'LibreChat',
},
};
}
const { headers } = this.options;
if (headers && typeof headers === 'object' && !Array.isArray(headers)) {
configOptions.baseOptions = {
headers: resolveHeaders({
...headers,
...configOptions?.baseOptions?.headers,
}),
};
}
if (this.options.proxy) {
configOptions.httpAgent = new HttpsProxyAgent(this.options.proxy);
configOptions.httpsAgent = new HttpsProxyAgent(this.options.proxy);
}
const { req, res, debug } = this.options;
const runManager = new RunManager({ req, res, debug, abortController: this.abortController });
this.runManager = runManager;
const llm = createLLM({
modelOptions,
configOptions,
openAIApiKey: this.apiKey,
azure: this.azure,
streaming,
callbacks: runManager.createCallbacks({
context,
tokenBuffer,
conversationId: this.conversationId ?? conversationId,
initialMessageCount,
}),
});
return llm;
}
/**
* Generates a concise title for a conversation based on the user's input text and response.
* Uses either specified method or starts with the OpenAI `functions` method (using LangChain).
* If the `functions` method fails, it falls back to the `completion` method,
* which involves sending a chat completion request with specific instructions for title generation.
*
* @param {Object} params - The parameters for the conversation title generation.
* @param {string} params.text - The user's input.
* @param {string} [params.conversationId] - The current conversationId, if not already defined on client initialization.
* @param {string} [params.responseText=''] - The AI's immediate response to the user.
*
* @returns {Promise<string | 'New Chat'>} A promise that resolves to the generated conversation title.
* In case of failure, it will return the default title, "New Chat".
*/
async titleConvo({ text, conversationId, responseText = '' }) {
this.conversationId = conversationId;
if (this.options.attachments) {
delete this.options.attachments;
}
let title = 'New Chat';
const convo = `||>User:
"${truncateText(text)}"
||>Response:
"${JSON.stringify(truncateText(responseText))}"`;
const { OPENAI_TITLE_MODEL } = process.env ?? {};
let model = this.options.titleModel ?? OPENAI_TITLE_MODEL ?? openAISettings.model.default;
if (model === Constants.CURRENT_MODEL) {
model = this.modelOptions.model;
}
const modelOptions = {
// TODO: remove the gpt fallback and make it specific to endpoint
model,
temperature: 0.2,
presence_penalty: 0,
frequency_penalty: 0,
max_tokens: 16,
};
/** @type {TAzureConfig | undefined} */
const azureConfig = this.options?.req?.app?.locals?.[EModelEndpoint.azureOpenAI];
const resetTitleOptions = !!(
(this.azure && azureConfig) ||
(azureConfig && this.options.endpoint === EModelEndpoint.azureOpenAI)
);
if (resetTitleOptions) {
const { modelGroupMap, groupMap } = azureConfig;
const {
azureOptions,
baseURL,
headers = {},
serverless,
} = mapModelToAzureConfig({
modelName: modelOptions.model,
modelGroupMap,
groupMap,
});
this.options.headers = resolveHeaders(headers);
this.options.reverseProxyUrl = baseURL ?? null;
this.langchainProxy = extractBaseURL(this.options.reverseProxyUrl);
this.apiKey = azureOptions.azureOpenAIApiKey;
const groupName = modelGroupMap[modelOptions.model].group;
this.options.addParams = azureConfig.groupMap[groupName].addParams;
this.options.dropParams = azureConfig.groupMap[groupName].dropParams;
this.options.forcePrompt = azureConfig.groupMap[groupName].forcePrompt;
this.azure = !serverless && azureOptions;
if (serverless === true) {
this.options.defaultQuery = azureOptions.azureOpenAIApiVersion
? { 'api-version': azureOptions.azureOpenAIApiVersion }
: undefined;
this.options.headers['api-key'] = this.apiKey;
}
}
const titleChatCompletion = async () => {
try {
modelOptions.model = model;
if (this.azure) {
modelOptions.model = process.env.AZURE_OPENAI_DEFAULT_MODEL ?? modelOptions.model;
this.azureEndpoint = genAzureChatCompletion(this.azure, modelOptions.model, this);
}
const instructionsPayload = [
{
role: this.options.titleMessageRole ?? (this.isOllama ? 'user' : 'system'),
content: `Please generate ${titleInstruction}
${convo}
||>Title:`,
},
];
const promptTokens = this.getTokenCountForMessage(instructionsPayload[0]);
let useChatCompletion = true;
if (this.options.reverseProxyUrl === CohereConstants.API_URL) {
useChatCompletion = false;
}
title = (
await this.sendPayload(instructionsPayload, {
modelOptions,
useChatCompletion,
context: 'title',
})
).replaceAll('"', '');
const completionTokens = this.getTokenCount(title);
await this.recordTokenUsage({ promptTokens, completionTokens, context: 'title' });
} catch (e) {
logger.error(
'[OpenAIClient] There was an issue generating the title with the completion method',
e,
);
}
};
if (this.options.titleMethod === 'completion') {
await titleChatCompletion();
logger.debug('[OpenAIClient] Convo Title: ' + title);
return title;
}
try {
this.abortController = new AbortController();
const llm = this.initializeLLM({
...modelOptions,
conversationId,
context: 'title',
tokenBuffer: 150,
});
title = await runTitleChain({ llm, text, convo, signal: this.abortController.signal });
} catch (e) {
if (e?.message?.toLowerCase()?.includes('abort')) {
logger.debug('[OpenAIClient] Aborted title generation');
return;
}
logger.error(
'[OpenAIClient] There was an issue generating title with LangChain, trying completion method...',
e,
);
await titleChatCompletion();
}
logger.debug('[OpenAIClient] Convo Title: ' + title);
return title;
}
/**
* Get stream usage as returned by this client's API response.
* @returns {OpenAIUsageMetadata} The stream usage object.
*/
getStreamUsage() {
if (
this.usage &&
typeof this.usage === 'object' &&
'completion_tokens_details' in this.usage &&
this.usage.completion_tokens_details &&
typeof this.usage.completion_tokens_details === 'object' &&
'reasoning_tokens' in this.usage.completion_tokens_details
) {
const outputTokens = Math.abs(
this.usage.completion_tokens_details.reasoning_tokens - this.usage[this.outputTokensKey],
);
return {
...this.usage.completion_tokens_details,
[this.inputTokensKey]: this.usage[this.inputTokensKey],
[this.outputTokensKey]: outputTokens,
};
}
return this.usage;
}
/**
* Calculates the correct token count for the current user message based on the token count map and API usage.
* Edge case: If the calculation results in a negative value, it returns the original estimate.
* If revisiting a conversation with a chat history entirely composed of token estimates,
* the cumulative token count going forward should become more accurate as the conversation progresses.
* @param {Object} params - The parameters for the calculation.
* @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts.
* @param {string} params.currentMessageId - The ID of the current message to calculate.
* @param {OpenAIUsageMetadata} params.usage - The usage object returned by the API.
* @returns {number} The correct token count for the current user message.
*/
calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) {
const originalEstimate = tokenCountMap[currentMessageId] || 0;
if (!usage || typeof usage[this.inputTokensKey] !== 'number') {
return originalEstimate;
}
tokenCountMap[currentMessageId] = 0;
const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => {
const numCount = Number(count);
return sum + (isNaN(numCount) ? 0 : numCount);
}, 0);
const totalInputTokens = usage[this.inputTokensKey] ?? 0;
const currentMessageTokens = totalInputTokens - totalTokensFromMap;
return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate;
}
async summarizeMessages({ messagesToRefine, remainingContextTokens }) {
logger.debug('[OpenAIClient] Summarizing messages...');
let context = messagesToRefine;
let prompt;
// TODO: remove the gpt fallback and make it specific to endpoint
const { OPENAI_SUMMARY_MODEL = openAISettings.model.default } = process.env ?? {};
let model = this.options.summaryModel ?? OPENAI_SUMMARY_MODEL;
if (model === Constants.CURRENT_MODEL) {
model = this.modelOptions.model;
}
const maxContextTokens =
getModelMaxTokens(
model,
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ?? 4095; // 1 less than maximum
// 3 tokens for the assistant label, and 98 for the summarizer prompt (101)
let promptBuffer = 101;
/*
* Note: token counting here is to block summarization if it exceeds the spend; complete
* accuracy is not important. Actual spend will happen after successful summarization.
*/
const excessTokenCount = context.reduce(
(acc, message) => acc + message.tokenCount,
promptBuffer,
);
if (excessTokenCount > maxContextTokens) {
({ context } = await this.getMessagesWithinTokenLimit({
messages: context,
maxContextTokens,
}));
}
if (context.length === 0) {
logger.debug(
'[OpenAIClient] Summary context is empty, using latest message within token limit',
);
promptBuffer = 32;
const { text, ...latestMessage } = messagesToRefine[messagesToRefine.length - 1];
const splitText = await tokenSplit({
text,
chunkSize: Math.floor((maxContextTokens - promptBuffer) / 3),
});
const newText = `${splitText[0]}\n...[truncated]...\n${splitText[splitText.length - 1]}`;
prompt = CUT_OFF_PROMPT;
context = [
formatMessage({
message: {
...latestMessage,
text: newText,
},
userName: this.options?.name,
assistantName: this.options?.chatGptLabel,
}),
];
}
// TODO: We can accurately count the tokens here before handleChatModelStart
// by recreating the summary prompt (single message) to avoid LangChain handling
const initialPromptTokens = this.maxContextTokens - remainingContextTokens;
logger.debug('[OpenAIClient] initialPromptTokens', initialPromptTokens);
const llm = this.initializeLLM({
model,
temperature: 0.2,
context: 'summary',
tokenBuffer: initialPromptTokens,
});
try {
const summaryMessage = await summaryBuffer({
llm,
debug: this.options.debug,
prompt,
context,
formatOptions: {
userName: this.options?.name,
assistantName: this.options?.chatGptLabel ?? this.options?.modelLabel,
},
previous_summary: this.previous_summary?.summary,
signal: this.abortController.signal,
});
const summaryTokenCount = this.getTokenCountForMessage(summaryMessage);
if (this.options.debug) {
logger.debug('[OpenAIClient] summaryTokenCount', summaryTokenCount);
logger.debug(
`[OpenAIClient] Summarization complete: remainingContextTokens: ${remainingContextTokens}, after refining: ${
remainingContextTokens - summaryTokenCount
}`,
);
}
return { summaryMessage, summaryTokenCount };
} catch (e) {
if (e?.message?.toLowerCase()?.includes('abort')) {
logger.debug('[OpenAIClient] Aborted summarization');
const { run, runId } = this.runManager.getRunByConversationId(this.conversationId);
if (run && run.error) {
const { error } = run;
this.runManager.removeRun(runId);
throw new Error(error);
}
}
logger.error('[OpenAIClient] Error summarizing messages', e);
return {};
}
}
/**
* @param {object} params
* @param {number} params.promptTokens
* @param {number} params.completionTokens
* @param {OpenAIUsageMetadata} [params.usage]
* @param {string} [params.model]
* @param {string} [params.context='message']
* @returns {Promise<void>}
*/
async recordTokenUsage({ promptTokens, completionTokens, usage, context = 'message' }) {
await spendTokens(
{
context,
model: this.modelOptions.model,
conversationId: this.conversationId,
user: this.user ?? this.options.req.user?.id,
endpointTokenConfig: this.options.endpointTokenConfig,
},
{ promptTokens, completionTokens },
);
if (
usage &&
typeof usage === 'object' &&
'reasoning_tokens' in usage &&
typeof usage.reasoning_tokens === 'number'
) {
await spendTokens(
{
context: 'reasoning',
model: this.modelOptions.model,
conversationId: this.conversationId,
user: this.user ?? this.options.req.user?.id,
endpointTokenConfig: this.options.endpointTokenConfig,
},
{ completionTokens: usage.reasoning_tokens },
);
}
}
getTokenCountForResponse(response) {
return this.getTokenCountForMessage({
role: 'assistant',
content: response.text,
});
}
/**
*
* @param {string[]} [intermediateReply]
* @returns {string}
*/
getStreamText(intermediateReply) {
if (!this.streamHandler) {
return intermediateReply?.join('') ?? '';
}
let thinkMatch;
let remainingText;
let reasoningText = '';
if (this.streamHandler.reasoningTokens.length > 0) {
reasoningText = this.streamHandler.reasoningTokens.join('');
thinkMatch = reasoningText.match(/<think>([\s\S]*?)<\/think>/)?.[1]?.trim();
if (thinkMatch != null && thinkMatch) {
const reasoningTokens = `:::thinking\n${thinkMatch}\n:::\n`;
remainingText = reasoningText.split(/<\/think>/)?.[1]?.trim() || '';
return `${reasoningTokens}${remainingText}${this.streamHandler.tokens.join('')}`;
} else if (thinkMatch === '') {
remainingText = reasoningText.split(/<\/think>/)?.[1]?.trim() || '';
return `${remainingText}${this.streamHandler.tokens.join('')}`;
}
}
const reasoningTokens =
reasoningText.length > 0
? `:::thinking\n${reasoningText.replace('<think>', '').replace('</think>', '').trim()}\n:::\n`
: '';
return `${reasoningTokens}${this.streamHandler.tokens.join('')}`;
}
getMessageMapMethod() {
/**
* @param {TMessage} msg
*/
return (msg) => {
if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) {
msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim();
} else if (msg.content != null) {
msg.text = parseTextParts(msg.content, true);
delete msg.content;
}
return msg;
};
}
async chatCompletion({ payload, onProgress, abortController = null }) {
let error = null;
let intermediateReply = [];
const errorCallback = (err) => (error = err);
try {
if (!abortController) {
abortController = new AbortController();
}
let modelOptions = { ...this.modelOptions };
if (typeof onProgress === 'function') {
modelOptions.stream = true;
}
if (this.isChatCompletion) {
modelOptions.messages = payload;
} else {
modelOptions.prompt = payload;
}
const baseURL = extractBaseURL(this.completionsUrl);
logger.debug('[OpenAIClient] chatCompletion', { baseURL, modelOptions });
const opts = {
baseURL,
};
if (this.useOpenRouter) {
opts.defaultHeaders = {
'HTTP-Referer': 'https://librechat.ai',
'X-Title': 'LibreChat',
};
}
if (this.options.headers) {
opts.defaultHeaders = { ...opts.defaultHeaders, ...this.options.headers };
}
if (this.options.defaultQuery) {
opts.defaultQuery = this.options.defaultQuery;
}
if (this.options.proxy) {
opts.httpAgent = new HttpsProxyAgent(this.options.proxy);
}
/** @type {TAzureConfig | undefined} */
const azureConfig = this.options?.req?.app?.locals?.[EModelEndpoint.azureOpenAI];
if (
(this.azure && this.isVisionModel && azureConfig) ||
(azureConfig && this.isVisionModel && this.options.endpoint === EModelEndpoint.azureOpenAI)
) {
const { modelGroupMap, groupMap } = azureConfig;
const {
azureOptions,
baseURL,
headers = {},
serverless,
} = mapModelToAzureConfig({
modelName: modelOptions.model,
modelGroupMap,
groupMap,
});
opts.defaultHeaders = resolveHeaders(headers);
this.langchainProxy = extractBaseURL(baseURL);
this.apiKey = azureOptions.azureOpenAIApiKey;
const groupName = modelGroupMap[modelOptions.model].group;
this.options.addParams = azureConfig.groupMap[groupName].addParams;
this.options.dropParams = azureConfig.groupMap[groupName].dropParams;
// Note: `forcePrompt` not re-assigned as only chat models are vision models
this.azure = !serverless && azureOptions;
this.azureEndpoint =
!serverless && genAzureChatCompletion(this.azure, modelOptions.model, this);
if (serverless === true) {
this.options.defaultQuery = azureOptions.azureOpenAIApiVersion
? { 'api-version': azureOptions.azureOpenAIApiVersion }
: undefined;
this.options.headers['api-key'] = this.apiKey;
}
}
if (this.azure || this.options.azure) {
/* Azure Bug, extremely short default `max_tokens` response */
if (!modelOptions.max_tokens && modelOptions.model === 'gpt-4-vision-preview') {
modelOptions.max_tokens = 4000;
}
/* Azure does not accept `model` in the body, so we need to remove it. */
delete modelOptions.model;
opts.baseURL = this.langchainProxy
? constructAzureURL({
baseURL: this.langchainProxy,
azureOptions: this.azure,
})
: this.azureEndpoint.split(/(?<!\/)\/(chat|completion)\//)[0];
opts.defaultQuery = { 'api-version': this.azure.azureOpenAIApiVersion };
opts.defaultHeaders = { ...opts.defaultHeaders, 'api-key': this.apiKey };
}
if (this.isOmni === true && modelOptions.max_tokens != null) {
modelOptions.max_completion_tokens = modelOptions.max_tokens;
delete modelOptions.max_tokens;
}
if (this.isOmni === true && modelOptions.temperature != null) {
delete modelOptions.temperature;
}
if (process.env.OPENAI_ORGANIZATION) {
opts.organization = process.env.OPENAI_ORGANIZATION;
}
let chatCompletion;
/** @type {OpenAI} */
const openai = new OpenAI({
fetch: createFetch({
directEndpoint: this.options.directEndpoint,
reverseProxyUrl: this.options.reverseProxyUrl,
}),
apiKey: this.apiKey,
...opts,
});
/* Re-orders system message to the top of the messages payload, as not allowed anywhere else */
if (modelOptions.messages && (opts.baseURL.includes('api.mistral.ai') || this.isOllama)) {
const { messages } = modelOptions;
const systemMessageIndex = messages.findIndex((msg) => msg.role === 'system');
if (systemMessageIndex > 0) {
const [systemMessage] = messages.splice(systemMessageIndex, 1);
messages.unshift(systemMessage);
}
modelOptions.messages = messages;
}
/* If there is only one message and it's a system message, change the role to user */
if (
(opts.baseURL.includes('api.mistral.ai') || opts.baseURL.includes('api.perplexity.ai')) &&
modelOptions.messages &&
modelOptions.messages.length === 1 &&
modelOptions.messages[0]?.role === 'system'
) {
modelOptions.messages[0].role = 'user';
}
if (
(this.options.endpoint === EModelEndpoint.openAI ||
this.options.endpoint === EModelEndpoint.azureOpenAI) &&
modelOptions.stream === true
) {
modelOptions.stream_options = { include_usage: true };
}
if (this.options.addParams && typeof this.options.addParams === 'object') {
const addParams = { ...this.options.addParams };
modelOptions = {
...modelOptions,
...addParams,
};
logger.debug('[OpenAIClient] chatCompletion: added params', {
addParams: addParams,
modelOptions,
});
}
/** Note: OpenAI Web Search models do not support any known parameters besdies `max_tokens` */
if (modelOptions.model && /gpt-4o.*search/.test(modelOptions.model)) {
const searchExcludeParams = [
'frequency_penalty',
'presence_penalty',
'temperature',
'top_p',
'top_k',
'stop',
'logit_bias',
'seed',
'response_format',
'n',
'logprobs',
'user',
];
this.options.dropParams = this.options.dropParams || [];
this.options.dropParams = [
...new Set([...this.options.dropParams, ...searchExcludeParams]),
];
}
if (this.options.dropParams && Array.isArray(this.options.dropParams)) {
const dropParams = [...this.options.dropParams];
dropParams.forEach((param) => {
delete modelOptions[param];
});
logger.debug('[OpenAIClient] chatCompletion: dropped params', {
dropParams: dropParams,
modelOptions,
});
}
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
if (this.message_file_map && this.isOllama) {
const ollamaClient = new OllamaClient({ baseURL, streamRate });
return await ollamaClient.chatCompletion({
payload: modelOptions,
onProgress,
abortController,
});
}
let UnexpectedRoleError = false;
/** @type {Promise<void>} */
let streamPromise;
/** @type {(value: void | PromiseLike<void>) => void} */
let streamResolve;
if (
(!this.isOmni || /^o1-(mini|preview)/i.test(modelOptions.model)) &&
modelOptions.reasoning_effort != null
) {
delete modelOptions.reasoning_effort;
delete modelOptions.temperature;
}
let reasoningKey = 'reasoning_content';
if (this.useOpenRouter) {
modelOptions.include_reasoning = true;
reasoningKey = 'reasoning';
}
if (this.useOpenRouter && modelOptions.reasoning_effort != null) {
modelOptions.reasoning = {
effort: modelOptions.reasoning_effort,
};
delete modelOptions.reasoning_effort;
}
const handlers = createStreamEventHandlers(this.options.res);
this.streamHandler = new SplitStreamHandler({
reasoningKey,
accumulate: true,
runId: this.responseMessageId,
handlers,
});
intermediateReply = this.streamHandler.tokens;
if (modelOptions.stream) {
streamPromise = new Promise((resolve) => {
streamResolve = resolve;
});
/** @type {OpenAI.OpenAI.CompletionCreateParamsStreaming} */
const params = {
...modelOptions,
stream: true,
};
const stream = await openai.beta.chat.completions
.stream(params)
.on('abort', () => {
/* Do nothing here */
})
.on('error', (err) => {
handleOpenAIErrors(err, errorCallback, 'stream');
})
.on('finalChatCompletion', async (finalChatCompletion) => {
const finalMessage = finalChatCompletion?.choices?.[0]?.message;
if (!finalMessage) {
return;
}
await streamPromise;
if (finalMessage?.role !== 'assistant') {
finalChatCompletion.choices[0].message.role = 'assistant';
}
if (typeof finalMessage.content !== 'string' || finalMessage.content.trim() === '') {
finalChatCompletion.choices[0].message.content = this.streamHandler.tokens.join('');
}
})
.on('finalMessage', (message) => {
if (message?.role !== 'assistant') {
stream.messages.push({
role: 'assistant',
content: this.streamHandler.tokens.join(''),
});
UnexpectedRoleError = true;
}
});
if (this.continued === true) {
const latestText = addSpaceIfNeeded(
this.currentMessages[this.currentMessages.length - 1]?.text ?? '',
);
this.streamHandler.handle({
choices: [
{
delta: {
content: latestText,
},
},
],
});
}
for await (const chunk of stream) {
// Add finish_reason: null if missing in any choice
if (chunk.choices) {
chunk.choices.forEach((choice) => {
if (!('finish_reason' in choice)) {
choice.finish_reason = null;
}
});
}
this.streamHandler.handle(chunk);
if (abortController.signal.aborted) {
stream.controller.abort();
break;
}
await sleep(streamRate);
}
streamResolve();
if (!UnexpectedRoleError) {
chatCompletion = await stream.finalChatCompletion().catch((err) => {
handleOpenAIErrors(err, errorCallback, 'finalChatCompletion');
});
}
}
// regular completion
else {
chatCompletion = await openai.chat.completions
.create({
...modelOptions,
})
.catch((err) => {
handleOpenAIErrors(err, errorCallback, 'create');
});
}
if (openai.abortHandler && abortController.signal) {
abortController.signal.removeEventListener('abort', openai.abortHandler);
openai.abortHandler = undefined;
}
if (!chatCompletion && UnexpectedRoleError) {
throw new Error(
'OpenAI error: Invalid final message: OpenAI expects final message to include role=assistant',
);
} else if (!chatCompletion && error) {
throw new Error(error);
} else if (!chatCompletion) {
throw new Error('Chat completion failed');
}
const { choices } = chatCompletion;
this.usage = chatCompletion.usage;
if (!Array.isArray(choices) || choices.length === 0) {
logger.warn('[OpenAIClient] Chat completion response has no choices');
return this.streamHandler.tokens.join('');
}
const { message, finish_reason } = choices[0] ?? {};
this.metadata = { finish_reason };
logger.debug('[OpenAIClient] chatCompletion response', chatCompletion);
if (!message) {
logger.warn('[OpenAIClient] Message is undefined in chatCompletion response');
return this.streamHandler.tokens.join('');
}
if (typeof message.content !== 'string' || message.content.trim() === '') {
const reply = this.streamHandler.tokens.join('');
logger.debug(
'[OpenAIClient] chatCompletion: using intermediateReply due to empty message.content',
{ intermediateReply: reply },
);
return reply;
}
if (
this.streamHandler.reasoningTokens.length > 0 &&
this.options.context !== 'title' &&
!message.content.startsWith('<think>')
) {
return this.getStreamText();
} else if (
this.streamHandler.reasoningTokens.length > 0 &&
this.options.context !== 'title' &&
message.content.startsWith('<think>')
) {
return this.getStreamText();
}
return message.content;
} catch (err) {
if (
err?.message?.includes('abort') ||
(err instanceof OpenAI.APIError && err?.message?.includes('abort'))
) {
return this.getStreamText(intermediateReply);
}
if (
err?.message?.includes(
'OpenAI error: Invalid final message: OpenAI expects final message to include role=assistant',
) ||
err?.message?.includes(
'stream ended without producing a ChatCompletionMessage with role=assistant',
) ||
err?.message?.includes('The server had an error processing your request') ||
err?.message?.includes('missing finish_reason') ||
err?.message?.includes('missing role') ||
(err instanceof OpenAI.OpenAIError && err?.message?.includes('missing finish_reason'))
) {
logger.error('[OpenAIClient] Known OpenAI error:', err);
if (this.streamHandler && this.streamHandler.reasoningTokens.length) {
return this.getStreamText();
} else if (intermediateReply.length > 0) {
return this.getStreamText(intermediateReply);
} else {
throw err;
}
} else if (err instanceof OpenAI.APIError) {
if (this.streamHandler && this.streamHandler.reasoningTokens.length) {
return this.getStreamText();
} else if (intermediateReply.length > 0) {
return this.getStreamText(intermediateReply);
} else {
throw err;
}
} else {
logger.error('[OpenAIClient.chatCompletion] Unhandled error type', err);
throw err;
}
}
}
}
module.exports = OpenAIClient;