mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-03-09 01:22:36 +01:00
* 🧹 chore: resolve imports due to rebase
* chore: Update model mocks in unit tests for consistency
- Consolidated model mock implementations across various test files to streamline setup and reduce redundancy.
- Removed duplicate mock definitions for `getMultiplier` and `getCacheMultiplier`, ensuring a unified approach in `recordCollectedUsage.spec.js`, `openai.spec.js`, `responses.unit.spec.js`, and `abortMiddleware.spec.js`.
- Enhanced clarity and maintainability of test files by aligning mock structures with the latest model updates.
* fix: Safeguard token credit checks in transaction tests
- Updated assertions in `transaction.spec.ts` to handle potential null values for `updatedBalance` by using optional chaining.
- Enhanced robustness of tests related to token credit calculations, ensuring they correctly account for scenarios where the balance may not be found.
* chore: transaction methods with bulk insert functionality
- Introduced `bulkInsertTransactions` method in `transaction.ts` to facilitate batch insertion of transaction documents.
- Updated test file `transactions.bulk-parity.spec.ts` to utilize new pricing function assignments and handle potential null values in calculations, improving test robustness.
- Refactored pricing function initialization for clarity and consistency.
* refactor: Enhance type definitions and introduce new utility functions for model matching
- Added `findMatchingPattern` and `matchModelName` utility functions to improve model name matching logic in transaction methods.
- Updated type definitions for `findMatchingPattern` to accept a more specific tokensMap structure, enhancing type safety.
- Refactored `dbMethods` initialization in `transactions.bulk-parity.spec.ts` to include the new utility functions, improving test clarity and functionality.
* refactor: Update database method imports and enhance transaction handling
- Refactored `abortMiddleware.js` to utilize centralized database methods for message handling and conversation retrieval, improving code consistency.
- Enhanced `bulkInsertTransactions` in `transaction.ts` to handle empty document arrays gracefully and added error logging for better debugging.
- Updated type definitions in `transactions.ts` to enforce stricter typing for token types, enhancing type safety across transaction methods.
- Improved test setup in `transactions.bulk-parity.spec.ts` by refining pricing function assignments and ensuring robust handling of potential null values.
* refactor: Update database method references and improve transaction multiplier handling
- Refactored `client.js` to update database method references for `bulkInsertTransactions` and `updateBalance`, ensuring consistency in method usage.
- Enhanced transaction multiplier calculations in `transaction.spec.ts` to provide fallback values for write and read multipliers, improving robustness in cost calculations across structured token spending tests.
274 lines
8.1 KiB
JavaScript
274 lines
8.1 KiB
JavaScript
const { logger } = require('@librechat/data-schemas');
|
|
const { isAssistantsEndpoint, ErrorTypes } = require('librechat-data-provider');
|
|
const {
|
|
isEnabled,
|
|
sendEvent,
|
|
countTokens,
|
|
GenerationJobManager,
|
|
recordCollectedUsage,
|
|
sanitizeMessageForTransmit,
|
|
} = require('@librechat/api');
|
|
const { truncateText, smartTruncateText } = require('~/app/clients/prompts');
|
|
const clearPendingReq = require('~/cache/clearPendingReq');
|
|
const { sendError } = require('~/server/middleware/error');
|
|
const { abortRun } = require('./abortRun');
|
|
const db = require('~/models');
|
|
|
|
/**
|
|
* Spend tokens for all models from collected usage.
|
|
* This handles both sequential and parallel agent execution.
|
|
*
|
|
* IMPORTANT: After spending, this function clears the collectedUsage array
|
|
* to prevent double-spending. The array is shared with AgentClient.collectedUsage,
|
|
* so clearing it here prevents the finally block from also spending tokens.
|
|
*
|
|
* @param {Object} params
|
|
* @param {string} params.userId - User ID
|
|
* @param {string} params.conversationId - Conversation ID
|
|
* @param {Array<Object>} params.collectedUsage - Usage metadata from all models
|
|
* @param {string} [params.fallbackModel] - Fallback model name if not in usage
|
|
* @param {string} [params.messageId] - The response message ID for transaction correlation
|
|
*/
|
|
async function spendCollectedUsage({
|
|
userId,
|
|
conversationId,
|
|
collectedUsage,
|
|
fallbackModel,
|
|
messageId,
|
|
}) {
|
|
if (!collectedUsage || collectedUsage.length === 0) {
|
|
return;
|
|
}
|
|
|
|
await recordCollectedUsage(
|
|
{
|
|
spendTokens: db.spendTokens,
|
|
spendStructuredTokens: db.spendStructuredTokens,
|
|
pricing: { getMultiplier: db.getMultiplier, getCacheMultiplier: db.getCacheMultiplier },
|
|
bulkWriteOps: { insertMany: db.bulkInsertTransactions, updateBalance: db.updateBalance },
|
|
},
|
|
{
|
|
user: userId,
|
|
conversationId,
|
|
collectedUsage,
|
|
context: 'abort',
|
|
messageId,
|
|
model: fallbackModel,
|
|
},
|
|
);
|
|
|
|
// Clear the array to prevent double-spending from the AgentClient finally block.
|
|
// The collectedUsage array is shared by reference with AgentClient.collectedUsage,
|
|
// so clearing it here ensures recordCollectedUsage() sees an empty array and returns early.
|
|
collectedUsage.length = 0;
|
|
}
|
|
|
|
/**
|
|
* Abort an active message generation.
|
|
* Uses GenerationJobManager for all agent requests.
|
|
* Since streamId === conversationId, we can directly abort by conversationId.
|
|
*/
|
|
async function abortMessage(req, res) {
|
|
const { abortKey, endpoint } = req.body;
|
|
|
|
if (isAssistantsEndpoint(endpoint)) {
|
|
return await abortRun(req, res);
|
|
}
|
|
|
|
const conversationId = abortKey?.split(':')?.[0] ?? req.user.id;
|
|
const userId = req.user.id;
|
|
|
|
// Use GenerationJobManager to abort the job (streamId === conversationId)
|
|
const abortResult = await GenerationJobManager.abortJob(conversationId);
|
|
|
|
if (!abortResult.success) {
|
|
if (!res.headersSent) {
|
|
return res.status(204).send({ message: 'Request not found' });
|
|
}
|
|
return;
|
|
}
|
|
|
|
const { jobData, content, text, collectedUsage } = abortResult;
|
|
|
|
const completionTokens = await countTokens(text);
|
|
const promptTokens = jobData?.promptTokens ?? 0;
|
|
|
|
const responseMessage = {
|
|
messageId: jobData?.responseMessageId,
|
|
parentMessageId: jobData?.userMessage?.messageId,
|
|
conversationId: jobData?.conversationId,
|
|
content,
|
|
text,
|
|
sender: jobData?.sender ?? 'AI',
|
|
finish_reason: 'incomplete',
|
|
endpoint: jobData?.endpoint,
|
|
iconURL: jobData?.iconURL,
|
|
model: jobData?.model,
|
|
unfinished: false,
|
|
error: false,
|
|
isCreatedByUser: false,
|
|
tokenCount: completionTokens,
|
|
};
|
|
|
|
// Spend tokens for ALL models from collectedUsage (handles parallel agents/addedConvo)
|
|
if (collectedUsage && collectedUsage.length > 0) {
|
|
await spendCollectedUsage({
|
|
userId,
|
|
conversationId: jobData?.conversationId,
|
|
collectedUsage,
|
|
fallbackModel: jobData?.model,
|
|
messageId: jobData?.responseMessageId,
|
|
});
|
|
} else {
|
|
// Fallback: no collected usage, use text-based token counting for primary model only
|
|
await db.spendTokens(
|
|
{ ...responseMessage, context: 'incomplete', user: userId },
|
|
{ promptTokens, completionTokens },
|
|
);
|
|
}
|
|
|
|
await db.saveMessage(
|
|
{
|
|
userId: req?.user?.id,
|
|
isTemporary: req?.body?.isTemporary,
|
|
interfaceConfig: req?.config?.interfaceConfig,
|
|
},
|
|
{ ...responseMessage, user: userId },
|
|
{ context: 'api/server/middleware/abortMiddleware.js' },
|
|
);
|
|
|
|
// Get conversation for title
|
|
const conversation = await db.getConvo(userId, conversationId);
|
|
|
|
const finalEvent = {
|
|
title: conversation && !conversation.title ? null : conversation?.title || 'New Chat',
|
|
final: true,
|
|
conversation,
|
|
requestMessage: jobData?.userMessage
|
|
? sanitizeMessageForTransmit({
|
|
messageId: jobData.userMessage.messageId,
|
|
parentMessageId: jobData.userMessage.parentMessageId,
|
|
conversationId: jobData.userMessage.conversationId,
|
|
text: jobData.userMessage.text,
|
|
isCreatedByUser: true,
|
|
})
|
|
: null,
|
|
responseMessage,
|
|
};
|
|
|
|
logger.debug(
|
|
`[abortMessage] ID: ${userId} | ${req.user.email} | Aborted request: ${conversationId}`,
|
|
);
|
|
|
|
if (res.headersSent) {
|
|
return sendEvent(res, finalEvent);
|
|
}
|
|
|
|
res.setHeader('Content-Type', 'application/json');
|
|
res.send(JSON.stringify(finalEvent));
|
|
}
|
|
|
|
const handleAbort = function () {
|
|
return async function (req, res) {
|
|
try {
|
|
if (isEnabled(process.env.LIMIT_CONCURRENT_MESSAGES)) {
|
|
await clearPendingReq({ userId: req.user.id });
|
|
}
|
|
return await abortMessage(req, res);
|
|
} catch (err) {
|
|
logger.error('[abortMessage] handleAbort error', err);
|
|
}
|
|
};
|
|
};
|
|
|
|
/**
|
|
* Handle abort errors during generation.
|
|
* @param {ServerResponse} res
|
|
* @param {ServerRequest} req
|
|
* @param {Error | unknown} error
|
|
* @param {Partial<TMessage> & { partialText?: string }} data
|
|
* @returns {Promise<void>}
|
|
*/
|
|
const handleAbortError = async (res, req, error, data) => {
|
|
if (error?.message?.includes('base64')) {
|
|
logger.error('[handleAbortError] Error in base64 encoding', {
|
|
...error,
|
|
stack: smartTruncateText(error?.stack, 1000),
|
|
message: truncateText(error.message, 350),
|
|
});
|
|
} else {
|
|
logger.error('[handleAbortError] AI response error; aborting request:', error);
|
|
}
|
|
const { sender, conversationId, messageId, parentMessageId, userMessageId, partialText } = data;
|
|
|
|
if (error.stack && error.stack.includes('google')) {
|
|
logger.warn(
|
|
`AI Response error for conversation ${conversationId} likely caused by Google censor/filter`,
|
|
);
|
|
}
|
|
|
|
let errorText = error?.message?.includes('"type"')
|
|
? error.message
|
|
: 'An error occurred while processing your request. Please contact the Admin.';
|
|
|
|
if (error?.type === ErrorTypes.INVALID_REQUEST) {
|
|
errorText = `{"type":"${ErrorTypes.INVALID_REQUEST}"}`;
|
|
}
|
|
|
|
if (error?.message?.includes("does not support 'system'")) {
|
|
errorText = `{"type":"${ErrorTypes.NO_SYSTEM_MESSAGES}"}`;
|
|
}
|
|
|
|
/**
|
|
* @param {string} partialText
|
|
* @returns {Promise<void>}
|
|
*/
|
|
const respondWithError = async (partialText) => {
|
|
const endpointOption = req.body?.endpointOption;
|
|
let options = {
|
|
sender,
|
|
messageId,
|
|
conversationId,
|
|
parentMessageId,
|
|
text: errorText,
|
|
user: req.user.id,
|
|
spec: endpointOption?.spec,
|
|
iconURL: endpointOption?.iconURL,
|
|
modelLabel: endpointOption?.modelLabel,
|
|
shouldSaveMessage: userMessageId != null,
|
|
model: endpointOption?.modelOptions?.model || req.body?.model,
|
|
};
|
|
|
|
if (req.body?.agent_id) {
|
|
options.agent_id = req.body.agent_id;
|
|
}
|
|
|
|
if (partialText) {
|
|
options = {
|
|
...options,
|
|
error: false,
|
|
unfinished: true,
|
|
text: partialText,
|
|
};
|
|
}
|
|
|
|
await sendError(req, res, options);
|
|
};
|
|
|
|
if (partialText && partialText.length > 5) {
|
|
try {
|
|
return await abortMessage(req, res);
|
|
} catch (err) {
|
|
logger.error('[handleAbortError] error while trying to abort message', err);
|
|
return respondWithError(partialText);
|
|
}
|
|
} else {
|
|
return respondWithError();
|
|
}
|
|
};
|
|
|
|
module.exports = {
|
|
handleAbort,
|
|
handleAbortError,
|
|
spendCollectedUsage,
|
|
};
|