fix: Match OpenAI Token Counting Strategy 🪙 (#945)

* wip token fix * fix: complete token count refactor to match OpenAI example * chore: add back sendPayload method (accidentally deleted) * chore: revise JSDoc for getTokenCountForMessage
2026-02-07 02:01:50 +01:00 · 2023-09-14 19:40:21 -04:00 · 2023-09-14 19:40:21 -04:00 · 9491b753c3
commit 9491b753c3
parent b3afd562b9
11 changed files with 115 additions and 76 deletions
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@ -272,7 +272,9 @@ class BaseClient {
   * @returns {Object} An object with three properties: `context`, `remainingContextTokens`, and `messagesToRefine`. `context` is an array of messages that fit within the token limit. `remainingContextTokens` is the number of tokens remaining within the limit after adding the messages to the context. `messagesToRefine` is an array of messages that were not added to the context because they would have exceeded the token limit.
   */
  async getMessagesWithinTokenLimit(messages) {
-    let currentTokenCount = 0;
+    // Every reply is primed with <|start|>assistant<|message|>, so we
+    // start with 3 tokens for the label after all messages have been counted.
+    let currentTokenCount = 3;
    let context = [];
    let messagesToRefine = [];
    let refineIndex = -1;
@ -562,44 +564,29 @@ class BaseClient {
   * Algorithm adapted from "6. Counting tokens for chat API calls" of
   * https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
   *
-   * An additional 2 tokens need to be added for metadata after all messages have been counted.
+   * An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
   *
-   * @param {*} message
+   * @param {Object} message
   */
  getTokenCountForMessage(message) {
-    let tokensPerMessage;
-    let nameAdjustment;
-    if (this.modelOptions.model.startsWith('gpt-4')) {
-      tokensPerMessage = 3;
-      nameAdjustment = 1;
-    } else {
+    // Note: gpt-3.5-turbo and gpt-4 may update over time. Use default for these as well as for unknown models
+    let tokensPerMessage = 3;
+    let tokensPerName = 1;
+
+    if (this.modelOptions.model === 'gpt-3.5-turbo-0301') {
      tokensPerMessage = 4;
-      nameAdjustment = -1;
+      tokensPerName = -1;
    }

-    if (this.options.debug) {
-      console.debug('getTokenCountForMessage', message);
-    }
-
-    // Map each property of the message to the number of tokens it contains
-    const propertyTokenCounts = Object.entries(message).map(([key, value]) => {
-      if (key === 'tokenCount' || typeof value !== 'string') {
-        return 0;
+    let numTokens = tokensPerMessage;
+    for (let [key, value] of Object.entries(message)) {
+      numTokens += this.getTokenCount(value);
+      if (key === 'name') {
+        numTokens += tokensPerName;
      }
-      // Count the number of tokens in the property value
-      const numTokens = this.getTokenCount(value);
-
-      // Adjust by `nameAdjustment` tokens if the property key is 'name'
-      const adjustment = key === 'name' ? nameAdjustment : 0;
-      return numTokens + adjustment;
-    });
-
-    if (this.options.debug) {
-      console.debug('propertyTokenCounts', propertyTokenCounts);
    }

-    // Sum the number of tokens in all properties and add `tokensPerMessage` for metadata
-    return propertyTokenCounts.reduce((a, b) => a + b, tokensPerMessage);
+    return numTokens;
  }

  async sendPayload(payload, opts = {}) {