fix: Match OpenAI Token Counting Strategy 🪙 (#945)

* wip token fix

* fix: complete token count refactor to match OpenAI example

* chore: add back sendPayload method (accidentally deleted)

* chore: revise JSDoc for getTokenCountForMessage
This commit is contained in:
Danny Avila 2023-09-14 19:40:21 -04:00 committed by GitHub
parent b3afd562b9
commit 9491b753c3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 115 additions and 76 deletions

View file

@ -272,7 +272,9 @@ class BaseClient {
* @returns {Object} An object with three properties: `context`, `remainingContextTokens`, and `messagesToRefine`. `context` is an array of messages that fit within the token limit. `remainingContextTokens` is the number of tokens remaining within the limit after adding the messages to the context. `messagesToRefine` is an array of messages that were not added to the context because they would have exceeded the token limit.
*/
async getMessagesWithinTokenLimit(messages) {
let currentTokenCount = 0;
// Every reply is primed with <|start|>assistant<|message|>, so we
// start with 3 tokens for the label after all messages have been counted.
let currentTokenCount = 3;
let context = [];
let messagesToRefine = [];
let refineIndex = -1;
@ -562,44 +564,29 @@ class BaseClient {
* Algorithm adapted from "6. Counting tokens for chat API calls" of
* https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
*
* An additional 2 tokens need to be added for metadata after all messages have been counted.
* An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
*
* @param {*} message
* @param {Object} message
*/
getTokenCountForMessage(message) {
let tokensPerMessage;
let nameAdjustment;
if (this.modelOptions.model.startsWith('gpt-4')) {
tokensPerMessage = 3;
nameAdjustment = 1;
} else {
// Note: gpt-3.5-turbo and gpt-4 may update over time. Use default for these as well as for unknown models
let tokensPerMessage = 3;
let tokensPerName = 1;
if (this.modelOptions.model === 'gpt-3.5-turbo-0301') {
tokensPerMessage = 4;
nameAdjustment = -1;
tokensPerName = -1;
}
if (this.options.debug) {
console.debug('getTokenCountForMessage', message);
}
// Map each property of the message to the number of tokens it contains
const propertyTokenCounts = Object.entries(message).map(([key, value]) => {
if (key === 'tokenCount' || typeof value !== 'string') {
return 0;
let numTokens = tokensPerMessage;
for (let [key, value] of Object.entries(message)) {
numTokens += this.getTokenCount(value);
if (key === 'name') {
numTokens += tokensPerName;
}
// Count the number of tokens in the property value
const numTokens = this.getTokenCount(value);
// Adjust by `nameAdjustment` tokens if the property key is 'name'
const adjustment = key === 'name' ? nameAdjustment : 0;
return numTokens + adjustment;
});
if (this.options.debug) {
console.debug('propertyTokenCounts', propertyTokenCounts);
}
// Sum the number of tokens in all properties and add `tokensPerMessage` for metadata
return propertyTokenCounts.reduce((a, b) => a + b, tokensPerMessage);
return numTokens;
}
async sendPayload(payload, opts = {}) {