🪙 feat: Use OpenRouter Model Data for Token Cost and Context (#1703)

* feat: use openrouter data for model token cost/context

* chore: add ttl for tokenConfig and refetch models if cache expired
This commit is contained in:
Danny Avila 2024-02-02 00:42:11 -05:00 committed by GitHub
parent f1d974c513
commit 30e143e96d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 146 additions and 16 deletions

View file

@ -428,7 +428,10 @@ class BaseClient {
await this.saveMessageToDatabase(userMessage, saveOptions, user);
}
if (isEnabled(process.env.CHECK_BALANCE) && supportsBalanceCheck[this.options.endpoint]) {
if (
isEnabled(process.env.CHECK_BALANCE) &&
supportsBalanceCheck[this.options.endpointType ?? this.options.endpoint]
) {
await checkBalance({
req: this.options.req,
res: this.options.res,
@ -438,6 +441,7 @@ class BaseClient {
amount: promptTokens,
model: this.modelOptions.model,
endpoint: this.options.endpoint,
endpointTokenConfig: this.options.endpointTokenConfig,
},
});
}

View file

@ -131,8 +131,13 @@ class OpenAIClient extends BaseClient {
const { isChatGptModel } = this;
this.isUnofficialChatGptModel =
model.startsWith('text-chat') || model.startsWith('text-davinci-002-render');
this.maxContextTokens =
getModelMaxTokens(model, this.options.endpointType ?? this.options.endpoint) ?? 4095; // 1 less than maximum
getModelMaxTokens(
model,
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ?? 4095; // 1 less than maximum
if (this.shouldSummarize) {
this.maxContextTokens = Math.floor(this.maxContextTokens / 2);
@ -780,7 +785,12 @@ ${convo}
// TODO: remove the gpt fallback and make it specific to endpoint
const { OPENAI_SUMMARY_MODEL = 'gpt-3.5-turbo' } = process.env ?? {};
const model = this.options.summaryModel ?? OPENAI_SUMMARY_MODEL;
const maxContextTokens = getModelMaxTokens(model) ?? 4095;
const maxContextTokens =
getModelMaxTokens(
model,
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ?? 4095; // 1 less than maximum
// 3 tokens for the assistant label, and 98 for the summarizer prompt (101)
let promptBuffer = 101;
@ -886,6 +896,7 @@ ${convo}
model: this.modelOptions.model,
context: 'message',
conversationId: this.conversationId,
endpointTokenConfig: this.options.endpointTokenConfig,
},
{ promptTokens, completionTokens },
);