LibreChat/api/server/utils/handleText.js
Danny Avila 8bdc808074
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
refactor: Optimize & Standardize Tokenizer Usage (#10777)
* refactor: Token Limit Processing with Enhanced Efficiency

- Added a new test suite for `processTextWithTokenLimit`, ensuring comprehensive coverage of various scenarios including under, at, and exceeding token limits.
- Refactored the `processTextWithTokenLimit` function to utilize a ratio-based estimation method, significantly reducing the number of token counting function calls compared to the previous binary search approach.
- Improved handling of edge cases and variable token density, ensuring accurate truncation and performance across diverse text inputs.
- Included direct comparisons with the old implementation to validate correctness and efficiency improvements.

* refactor: Remove Tokenizer Route and Related References

- Deleted the tokenizer route from the server and removed its references from the routes index and server files, streamlining the API structure.
- This change simplifies the routing configuration by eliminating unused endpoints.

* refactor: Migrate countTokens Utility to API Module

- Removed the local countTokens utility and integrated it into the @librechat/api module for centralized access.
- Updated various files to reference the new countTokens import from the API module, ensuring consistent usage across the application.
- Cleaned up unused references and imports related to the previous countTokens implementation.

* refactor: Centralize escapeRegExp Utility in API Module

- Moved the escapeRegExp function from local utility files to the @librechat/api module for consistent usage across the application.
- Updated imports in various files to reference the new centralized escapeRegExp function, ensuring cleaner code and reducing redundancy.
- Removed duplicate implementations of escapeRegExp from multiple files, streamlining the codebase.

* refactor: Enhance Token Counting Flexibility in Text Processing

- Updated the `processTextWithTokenLimit` function to accept both synchronous and asynchronous token counting functions, improving its versatility.
- Introduced a new `TokenCountFn` type to define the token counting function signature.
- Added comprehensive tests to validate the behavior of `processTextWithTokenLimit` with both sync and async token counting functions, ensuring consistent results.
- Implemented a wrapper to track call counts for the `countTokens` function, optimizing performance and reducing unnecessary calls.
- Enhanced existing tests to compare the performance of the new implementation against the old one, demonstrating significant improvements in efficiency.

* chore: documentation for Truncation Safety Buffer in Token Processing

- Added a safety buffer multiplier to the character position estimates during text truncation to prevent overshooting token limits.
- Updated the `processTextWithTokenLimit` function to utilize the new `TRUNCATION_SAFETY_BUFFER` constant, enhancing the accuracy of token limit processing.
- Improved documentation to clarify the rationale behind the buffer and its impact on performance and efficiency in token counting.
2025-12-02 12:22:04 -05:00

181 lines
4.8 KiB
JavaScript

const {
Capabilities,
EModelEndpoint,
isAgentsEndpoint,
isAssistantsEndpoint,
defaultRetrievalModels,
defaultAssistantsVersion,
defaultAgentCapabilities,
} = require('librechat-data-provider');
const { sendEvent } = require('@librechat/api');
const partialRight = require('lodash/partialRight');
const addSpaceIfNeeded = (text) => (text.length > 0 && !text.endsWith(' ') ? text + ' ' : text);
const base = { message: true, initial: true };
const createOnProgress = (
{ generation = '', onProgress: _onProgress } = {
generation: '',
onProgress: null,
},
) => {
let i = 0;
let tokens = addSpaceIfNeeded(generation);
const basePayload = Object.assign({}, base, { text: tokens || '' });
const progressCallback = (chunk, { res, ...rest }) => {
basePayload.text = basePayload.text + chunk;
const payload = Object.assign({}, basePayload, rest);
sendEvent(res, payload);
if (_onProgress) {
_onProgress(payload);
}
if (i === 0) {
basePayload.initial = false;
}
i++;
};
const sendIntermediateMessage = (res, payload, extraTokens = '') => {
basePayload.text = basePayload.text + extraTokens;
const message = Object.assign({}, basePayload, payload);
sendEvent(res, message);
if (i === 0) {
basePayload.initial = false;
}
i++;
};
const onProgress = (opts) => {
return partialRight(progressCallback, opts);
};
const getPartialText = () => {
return basePayload.text;
};
return { onProgress, getPartialText, sendIntermediateMessage };
};
const handleText = async (response) => {
let { text } = response;
response.text = text;
return text;
};
const isObject = (item) => item && typeof item === 'object' && !Array.isArray(item);
const getString = (input) => (isObject(input) ? JSON.stringify(input) : input);
function formatSteps(steps) {
let output = '';
for (let i = 0; i < steps.length; i++) {
const step = steps[i];
const actionInput = getString(step.action.toolInput);
const observation = step.observation;
if (actionInput === 'N/A' || observation?.trim()?.length === 0) {
continue;
}
output += `Input: ${actionInput}\nOutput: ${getString(observation)}`;
if (steps.length > 1 && i !== steps.length - 1) {
output += '\n---\n';
}
}
return output;
}
function formatAction(action) {
const formattedAction = {
plugin: action.tool,
input: getString(action.toolInput),
thought: action.log.includes('Thought: ')
? action.log.split('\n')[0].replace('Thought: ', '')
: action.log.split('\n')[0],
};
formattedAction.thought = getString(formattedAction.thought);
if (action.tool.toLowerCase() === 'self-reflection' || formattedAction.plugin === 'N/A') {
formattedAction.inputStr = `{\n\tthought: ${formattedAction.input}${
!formattedAction.thought.includes(formattedAction.input)
? ' - ' + formattedAction.thought
: ''
}\n}`;
formattedAction.inputStr = formattedAction.inputStr.replace('N/A - ', '');
} else {
const hasThought = formattedAction.thought.length > 0;
const thought = hasThought ? `\n\tthought: ${formattedAction.thought}` : '';
formattedAction.inputStr = `{\n\tplugin: ${formattedAction.plugin}\n\tinput: ${formattedAction.input}\n${thought}}`;
}
return formattedAction;
}
/**
* Checks if the provided value is 'user_provided'.
*
* @param {string} value - The value to check.
* @returns {boolean} - Returns true if the value is 'user_provided', otherwise false.
*/
const isUserProvided = (value) => value === 'user_provided';
/**
* Generate the configuration for a given key and base URL.
* @param {string} key
* @param {string} [baseURL]
* @param {string} [endpoint]
* @returns {boolean | { userProvide: boolean, userProvideURL?: boolean }}
*/
function generateConfig(key, baseURL, endpoint) {
if (!key) {
return false;
}
/** @type {{ userProvide: boolean, userProvideURL?: boolean }} */
const config = { userProvide: isUserProvided(key) };
if (baseURL) {
config.userProvideURL = isUserProvided(baseURL);
}
const assistants = isAssistantsEndpoint(endpoint);
const agents = isAgentsEndpoint(endpoint);
if (assistants) {
config.retrievalModels = defaultRetrievalModels;
config.capabilities = [
Capabilities.code_interpreter,
Capabilities.image_vision,
Capabilities.retrieval,
Capabilities.actions,
Capabilities.tools,
];
}
if (agents) {
config.capabilities = defaultAgentCapabilities;
}
if (assistants && endpoint === EModelEndpoint.azureAssistants) {
config.version = defaultAssistantsVersion.azureAssistants;
} else if (assistants) {
config.version = defaultAssistantsVersion.assistants;
}
return config;
}
module.exports = {
handleText,
formatSteps,
formatAction,
isUserProvided,
generateConfig,
addSpaceIfNeeded,
createOnProgress,
};