mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
🪙 feat: Use OpenRouter Model Data for Token Cost and Context (#1703)
* feat: use openrouter data for model token cost/context * chore: add ttl for tokenConfig and refetch models if cache expired
This commit is contained in:
parent
f1d974c513
commit
30e143e96d
14 changed files with 146 additions and 16 deletions
|
|
@ -1,3 +1,4 @@
|
|||
const z = require('zod');
|
||||
const { EModelEndpoint } = require('librechat-data-provider');
|
||||
|
||||
const models = [
|
||||
|
|
@ -91,6 +92,7 @@ const maxTokensMap = {
|
|||
*
|
||||
* @param {string} modelName - The name of the model to look up.
|
||||
* @param {string} endpoint - The endpoint (default is 'openAI').
|
||||
* @param {EndpointTokenConfig} [endpointTokenConfig] - Token Config for current endpoint to use for max tokens lookup
|
||||
* @returns {number|undefined} The maximum tokens for the given model or undefined if no match is found.
|
||||
*
|
||||
* @example
|
||||
|
|
@ -98,16 +100,21 @@ const maxTokensMap = {
|
|||
* getModelMaxTokens('gpt-4-32k-unknown'); // Returns 32767
|
||||
* getModelMaxTokens('unknown-model'); // Returns undefined
|
||||
*/
|
||||
function getModelMaxTokens(modelName, endpoint = EModelEndpoint.openAI) {
|
||||
function getModelMaxTokens(modelName, endpoint = EModelEndpoint.openAI, endpointTokenConfig) {
|
||||
if (typeof modelName !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const tokensMap = maxTokensMap[endpoint];
|
||||
/** @type {EndpointTokenConfig | Record<string, number>} */
|
||||
const tokensMap = endpointTokenConfig ?? maxTokensMap[endpoint];
|
||||
if (!tokensMap) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (tokensMap[modelName]?.context) {
|
||||
return tokensMap[modelName].context;
|
||||
}
|
||||
|
||||
if (tokensMap[modelName]) {
|
||||
return tokensMap[modelName];
|
||||
}
|
||||
|
|
@ -115,7 +122,8 @@ function getModelMaxTokens(modelName, endpoint = EModelEndpoint.openAI) {
|
|||
const keys = Object.keys(tokensMap);
|
||||
for (let i = keys.length - 1; i >= 0; i--) {
|
||||
if (modelName.includes(keys[i])) {
|
||||
return tokensMap[keys[i]];
|
||||
const result = tokensMap[keys[i]];
|
||||
return result?.context ?? result;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -160,9 +168,55 @@ function matchModelName(modelName, endpoint = EModelEndpoint.openAI) {
|
|||
return modelName;
|
||||
}
|
||||
|
||||
const modelSchema = z.object({
|
||||
id: z.string(),
|
||||
pricing: z.object({
|
||||
prompt: z.string(),
|
||||
completion: z.string(),
|
||||
}),
|
||||
context_length: z.number(),
|
||||
});
|
||||
|
||||
const inputSchema = z.object({
|
||||
data: z.array(modelSchema),
|
||||
});
|
||||
|
||||
/**
|
||||
* Processes a list of model data from an API and organizes it into structured data based on URL and specifics of rates and context.
|
||||
* @param {{ data: Array<z.infer<typeof modelSchema>> }} input The input object containing base URL and data fetched from the API.
|
||||
* @returns {EndpointTokenConfig} The processed model data.
|
||||
*/
|
||||
function processModelData(input) {
|
||||
const validationResult = inputSchema.safeParse(input);
|
||||
if (!validationResult.success) {
|
||||
throw new Error('Invalid input data');
|
||||
}
|
||||
const { data } = validationResult.data;
|
||||
|
||||
/** @type {EndpointTokenConfig} */
|
||||
const tokenConfig = {};
|
||||
|
||||
for (const model of data) {
|
||||
const modelKey = model.id;
|
||||
const prompt = parseFloat(model.pricing.prompt) * 1000000;
|
||||
const completion = parseFloat(model.pricing.completion) * 1000000;
|
||||
|
||||
tokenConfig[modelKey] = {
|
||||
prompt,
|
||||
completion,
|
||||
context: model.context_length,
|
||||
};
|
||||
}
|
||||
|
||||
return tokenConfig;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
tiktokenModels: new Set(models),
|
||||
maxTokensMap,
|
||||
inputSchema,
|
||||
modelSchema,
|
||||
getModelMaxTokens,
|
||||
matchModelName,
|
||||
processModelData,
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue