diff --git a/api/app/clients/BaseClient.js b/api/app/clients/BaseClient.js index 46b2c79221..23a56d67e6 100644 --- a/api/app/clients/BaseClient.js +++ b/api/app/clients/BaseClient.js @@ -40,6 +40,12 @@ class BaseClient { throw new Error('Subclasses attempted to call summarizeMessages without implementing it'); } + async getTokenCountForResponse(response) { + if (this.options.debug) { + console.debug('`recordTokenUsage` not implemented.', response); + } + } + async recordTokenUsage({ promptTokens, completionTokens }) { if (this.options.debug) { console.debug('`recordTokenUsage` not implemented.', { promptTokens, completionTokens }); @@ -455,11 +461,16 @@ class BaseClient { promptTokens, }; - if (tokenCountMap && this.getTokenCount) { - responseMessage.tokenCount = this.getTokenCount(completion); - responseMessage.completionTokens = responseMessage.tokenCount; + if ( + tokenCountMap && + this.recordTokenUsage && + this.getTokenCountForResponse && + this.getTokenCount + ) { + responseMessage.tokenCount = this.getTokenCountForResponse(responseMessage); + const completionTokens = this.getTokenCount(completion); + await this.recordTokenUsage({ promptTokens, completionTokens }); } - await this.recordTokenUsage(responseMessage); await this.saveMessageToDatabase(responseMessage, saveOptions, user); delete responseMessage.tokenCount; return responseMessage; diff --git a/api/app/clients/OpenAIClient.js b/api/app/clients/OpenAIClient.js index b8673d9d88..1450a08d7e 100644 --- a/api/app/clients/OpenAIClient.js +++ b/api/app/clients/OpenAIClient.js @@ -653,6 +653,13 @@ ${convo} { promptTokens, completionTokens }, ); } + + getTokenCountForResponse(response) { + return this.getTokenCountForMessage({ + role: 'assistant', + content: response.text, + }); + } } module.exports = OpenAIClient; diff --git a/api/app/clients/PluginsClient.js b/api/app/clients/PluginsClient.js index 919f1b8131..853d0e12df 100644 --- a/api/app/clients/PluginsClient.js +++ b/api/app/clients/PluginsClient.js @@ -230,13 +230,15 @@ If your reverse proxy is compatible to OpenAI specs in every other way, it may s console.debug('[handleResponseMessage] Output:', { output, errorMessage, ...result }); const { error } = responseMessage; if (!error) { - responseMessage.tokenCount = this.getTokenCount(responseMessage.text); - responseMessage.completionTokens = responseMessage.tokenCount; + responseMessage.tokenCount = this.getTokenCountForResponse(responseMessage); + responseMessage.completionTokens = this.getTokenCount(responseMessage.text); } + // Record usage only when completion is skipped as it is already recorded in the agent phase. if (!this.agentOptions.skipCompletion && !error) { await this.recordTokenUsage(responseMessage); } + await this.saveMessageToDatabase(responseMessage, saveOptions, user); delete responseMessage.tokenCount; return { ...responseMessage, ...result };