mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 06:00:56 +02:00

* WIP: gemini-1.5 support * feat: extended vertex ai support * fix: handle possibly undefined modelName * fix: gpt-4-turbo-preview invalid vision model * feat: specify `fileConfig.imageOutputType` and make PNG default image conversion type * feat: better truncation for errors including base64 strings * fix: gemini inlineData formatting * feat: RAG augmented prompt for gemini-1.5 * feat: gemini-1.5 rates and token window * chore: adjust tokens, update docs, update vision Models * chore: add back `ChatGoogleVertexAI` for chat models via vertex ai * refactor: ask/edit controllers to not use `unfinished` field for google endpoint * chore: remove comment * chore(ci): fix AppService test * chore: remove comment * refactor(GoogleSearch): use `GOOGLE_SEARCH_API_KEY` instead, issue warning for old variable * chore: bump data-provider to 0.5.4 * chore: update docs * fix: condition for gemini-1.5 using generative ai lib * chore: update docs * ci: add additional AppService test for `imageOutputType` * refactor: optimize new config value `imageOutputType` * chore: bump CONFIG_VERSION * fix(assistants): avatar upload
106 lines
4 KiB
JavaScript
106 lines
4 KiB
JavaScript
const { matchModelName } = require('../utils');
|
|
const defaultRate = 6;
|
|
|
|
/**
|
|
* Mapping of model token sizes to their respective multipliers for prompt and completion.
|
|
* The rates are 1 USD per 1M tokens.
|
|
* @type {Object.<string, {prompt: number, completion: number}>}
|
|
*/
|
|
const tokenValues = {
|
|
'8k': { prompt: 30, completion: 60 },
|
|
'32k': { prompt: 60, completion: 120 },
|
|
'4k': { prompt: 1.5, completion: 2 },
|
|
'16k': { prompt: 3, completion: 4 },
|
|
'gpt-3.5-turbo-1106': { prompt: 1, completion: 2 },
|
|
'gpt-4-1106': { prompt: 10, completion: 30 },
|
|
'gpt-3.5-turbo-0125': { prompt: 0.5, completion: 1.5 },
|
|
'claude-3-opus': { prompt: 15, completion: 75 },
|
|
'claude-3-sonnet': { prompt: 3, completion: 15 },
|
|
'claude-3-haiku': { prompt: 0.25, completion: 1.25 },
|
|
'claude-2.1': { prompt: 8, completion: 24 },
|
|
'claude-2': { prompt: 8, completion: 24 },
|
|
'claude-': { prompt: 0.8, completion: 2.4 },
|
|
'command-r-plus': { prompt: 3, completion: 15 },
|
|
'command-r': { prompt: 0.5, completion: 1.5 },
|
|
/* cohere doesn't have rates for the older command models,
|
|
so this was from https://artificialanalysis.ai/models/command-light/providers */
|
|
command: { prompt: 0.38, completion: 0.38 },
|
|
// 'gemini-1.5': { prompt: 7, completion: 21 }, // May 2nd, 2024 pricing
|
|
// 'gemini': { prompt: 0.5, completion: 1.5 }, // May 2nd, 2024 pricing
|
|
'gemini-1.5': { prompt: 0, completion: 0 }, // currently free
|
|
gemini: { prompt: 0, completion: 0 }, // currently free
|
|
};
|
|
|
|
/**
|
|
* Retrieves the key associated with a given model name.
|
|
*
|
|
* @param {string} model - The model name to match.
|
|
* @param {string} endpoint - The endpoint name to match.
|
|
* @returns {string|undefined} The key corresponding to the model name, or undefined if no match is found.
|
|
*/
|
|
const getValueKey = (model, endpoint) => {
|
|
const modelName = matchModelName(model, endpoint);
|
|
if (!modelName) {
|
|
return undefined;
|
|
}
|
|
|
|
if (modelName.includes('gpt-3.5-turbo-16k')) {
|
|
return '16k';
|
|
} else if (modelName.includes('gpt-3.5-turbo-0125')) {
|
|
return 'gpt-3.5-turbo-0125';
|
|
} else if (modelName.includes('gpt-3.5-turbo-1106')) {
|
|
return 'gpt-3.5-turbo-1106';
|
|
} else if (modelName.includes('gpt-3.5')) {
|
|
return '4k';
|
|
} else if (modelName.includes('gpt-4-1106')) {
|
|
return 'gpt-4-1106';
|
|
} else if (modelName.includes('gpt-4-0125')) {
|
|
return 'gpt-4-1106';
|
|
} else if (modelName.includes('gpt-4-turbo')) {
|
|
return 'gpt-4-1106';
|
|
} else if (modelName.includes('gpt-4-32k')) {
|
|
return '32k';
|
|
} else if (modelName.includes('gpt-4')) {
|
|
return '8k';
|
|
} else if (tokenValues[modelName]) {
|
|
return modelName;
|
|
}
|
|
|
|
return undefined;
|
|
};
|
|
|
|
/**
|
|
* Retrieves the multiplier for a given value key and token type. If no value key is provided,
|
|
* it attempts to derive it from the model name.
|
|
*
|
|
* @param {Object} params - The parameters for the function.
|
|
* @param {string} [params.valueKey] - The key corresponding to the model name.
|
|
* @param {string} [params.tokenType] - The type of token (e.g., 'prompt' or 'completion').
|
|
* @param {string} [params.model] - The model name to derive the value key from if not provided.
|
|
* @param {string} [params.endpoint] - The endpoint name to derive the value key from if not provided.
|
|
* @param {EndpointTokenConfig} [params.endpointTokenConfig] - The token configuration for the endpoint.
|
|
* @returns {number} The multiplier for the given parameters, or a default value if not found.
|
|
*/
|
|
const getMultiplier = ({ valueKey, tokenType, model, endpoint, endpointTokenConfig }) => {
|
|
if (endpointTokenConfig) {
|
|
return endpointTokenConfig?.[model]?.[tokenType] ?? defaultRate;
|
|
}
|
|
|
|
if (valueKey && tokenType) {
|
|
return tokenValues[valueKey][tokenType] ?? defaultRate;
|
|
}
|
|
|
|
if (!tokenType || !model) {
|
|
return 1;
|
|
}
|
|
|
|
valueKey = getValueKey(model, endpoint);
|
|
if (!valueKey) {
|
|
return defaultRate;
|
|
}
|
|
|
|
// If we got this far, and values[tokenType] is undefined somehow, return a rough average of default multipliers
|
|
return tokenValues[valueKey][tokenType] ?? defaultRate;
|
|
};
|
|
|
|
module.exports = { tokenValues, getValueKey, getMultiplier, defaultRate };
|