🪙 feat: Use OpenRouter Model Data for Token Cost and Context (#1703)

* feat: use openrouter data for model token cost/context * chore: add ttl for tokenConfig and refetch models if cache expired
2026-02-20 09:24:10 +01:00 · 2024-02-02 00:42:11 -05:00 · 2024-02-02 00:42:11 -05:00 · 30e143e96d
commit 30e143e96d
parent f1d974c513
14 changed files with 146 additions and 16 deletions
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@ -428,7 +428,10 @@ class BaseClient {
      await this.saveMessageToDatabase(userMessage, saveOptions, user);
    }

-    if (isEnabled(process.env.CHECK_BALANCE) && supportsBalanceCheck[this.options.endpoint]) {
+    if (
+      isEnabled(process.env.CHECK_BALANCE) &&
+      supportsBalanceCheck[this.options.endpointType ?? this.options.endpoint]
+    ) {
      await checkBalance({
        req: this.options.req,
        res: this.options.res,
@ -438,6 +441,7 @@ class BaseClient {
          amount: promptTokens,
          model: this.modelOptions.model,
          endpoint: this.options.endpoint,
+          endpointTokenConfig: this.options.endpointTokenConfig,
        },
      });
    }
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -131,8 +131,13 @@ class OpenAIClient extends BaseClient {
    const { isChatGptModel } = this;
    this.isUnofficialChatGptModel =
      model.startsWith('text-chat') || model.startsWith('text-davinci-002-render');
+
    this.maxContextTokens =
-      getModelMaxTokens(model, this.options.endpointType ?? this.options.endpoint) ?? 4095; // 1 less than maximum
+      getModelMaxTokens(
+        model,
+        this.options.endpointType ?? this.options.endpoint,
+        this.options.endpointTokenConfig,
+      ) ?? 4095; // 1 less than maximum

    if (this.shouldSummarize) {
      this.maxContextTokens = Math.floor(this.maxContextTokens / 2);
@ -780,7 +785,12 @@ ${convo}
    // TODO: remove the gpt fallback and make it specific to endpoint
    const { OPENAI_SUMMARY_MODEL = 'gpt-3.5-turbo' } = process.env ?? {};
    const model = this.options.summaryModel ?? OPENAI_SUMMARY_MODEL;
-    const maxContextTokens = getModelMaxTokens(model) ?? 4095;
+    const maxContextTokens =
+      getModelMaxTokens(
+        model,
+        this.options.endpointType ?? this.options.endpoint,
+        this.options.endpointTokenConfig,
+      ) ?? 4095; // 1 less than maximum

    // 3 tokens for the assistant label, and 98 for the summarizer prompt (101)
    let promptBuffer = 101;
@ -886,6 +896,7 @@ ${convo}
        model: this.modelOptions.model,
        context: 'message',
        conversationId: this.conversationId,
+        endpointTokenConfig: this.options.endpointTokenConfig,
      },
      { promptTokens, completionTokens },
    );