mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-02 00:28:51 +01:00
🪙 feat: Use OpenRouter Model Data for Token Cost and Context (#1703)
* feat: use openrouter data for model token cost/context * chore: add ttl for tokenConfig and refetch models if cache expired
This commit is contained in:
parent
f1d974c513
commit
30e143e96d
14 changed files with 146 additions and 16 deletions
|
|
@ -428,7 +428,10 @@ class BaseClient {
|
|||
await this.saveMessageToDatabase(userMessage, saveOptions, user);
|
||||
}
|
||||
|
||||
if (isEnabled(process.env.CHECK_BALANCE) && supportsBalanceCheck[this.options.endpoint]) {
|
||||
if (
|
||||
isEnabled(process.env.CHECK_BALANCE) &&
|
||||
supportsBalanceCheck[this.options.endpointType ?? this.options.endpoint]
|
||||
) {
|
||||
await checkBalance({
|
||||
req: this.options.req,
|
||||
res: this.options.res,
|
||||
|
|
@ -438,6 +441,7 @@ class BaseClient {
|
|||
amount: promptTokens,
|
||||
model: this.modelOptions.model,
|
||||
endpoint: this.options.endpoint,
|
||||
endpointTokenConfig: this.options.endpointTokenConfig,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -131,8 +131,13 @@ class OpenAIClient extends BaseClient {
|
|||
const { isChatGptModel } = this;
|
||||
this.isUnofficialChatGptModel =
|
||||
model.startsWith('text-chat') || model.startsWith('text-davinci-002-render');
|
||||
|
||||
this.maxContextTokens =
|
||||
getModelMaxTokens(model, this.options.endpointType ?? this.options.endpoint) ?? 4095; // 1 less than maximum
|
||||
getModelMaxTokens(
|
||||
model,
|
||||
this.options.endpointType ?? this.options.endpoint,
|
||||
this.options.endpointTokenConfig,
|
||||
) ?? 4095; // 1 less than maximum
|
||||
|
||||
if (this.shouldSummarize) {
|
||||
this.maxContextTokens = Math.floor(this.maxContextTokens / 2);
|
||||
|
|
@ -780,7 +785,12 @@ ${convo}
|
|||
// TODO: remove the gpt fallback and make it specific to endpoint
|
||||
const { OPENAI_SUMMARY_MODEL = 'gpt-3.5-turbo' } = process.env ?? {};
|
||||
const model = this.options.summaryModel ?? OPENAI_SUMMARY_MODEL;
|
||||
const maxContextTokens = getModelMaxTokens(model) ?? 4095;
|
||||
const maxContextTokens =
|
||||
getModelMaxTokens(
|
||||
model,
|
||||
this.options.endpointType ?? this.options.endpoint,
|
||||
this.options.endpointTokenConfig,
|
||||
) ?? 4095; // 1 less than maximum
|
||||
|
||||
// 3 tokens for the assistant label, and 98 for the summarizer prompt (101)
|
||||
let promptBuffer = 101;
|
||||
|
|
@ -886,6 +896,7 @@ ${convo}
|
|||
model: this.modelOptions.model,
|
||||
context: 'message',
|
||||
conversationId: this.conversationId,
|
||||
endpointTokenConfig: this.options.endpointTokenConfig,
|
||||
},
|
||||
{ promptTokens, completionTokens },
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue