feat: Accurate Token Usage Tracking & Optional Balance (#1018)

* refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2026-02-24 03:14:08 +01:00 · 2023-10-05 18:34:10 -04:00 · 2023-10-05 18:34:10 -04:00 · 365c39c405
commit 365c39c405
parent be71a1947b
81 changed files with 1606 additions and 293 deletions
--- a/client/src/components/Endpoints/Icon.tsx
+++ b/client/src/components/Endpoints/Icon.tsx
@ -34,12 +34,12 @@ const Icon: React.FC<IconProps> = (props) => {
  } else {
    const endpointIcons = {
      azureOpenAI: {
-        icon: <AzureMinimalIcon size={size * 0.55} />,
+        icon: <AzureMinimalIcon size={size * 0.5555555555555556} />,
        bg: 'linear-gradient(0.375turn, #61bde2, #4389d0)',
        name: 'ChatGPT',
      },
      openAI: {
-        icon: <GPTIcon size={size * 0.55} />,
+        icon: <GPTIcon size={size * 0.5555555555555556} />,
        bg:
          typeof model === 'string' && model.toLowerCase().includes('gpt-4')
            ? '#AB68FF'
@ -52,7 +52,11 @@ const Icon: React.FC<IconProps> = (props) => {
        name: 'Plugins',
      },
      google: { icon: <img src="/assets/google-palm.svg" alt="Palm Icon" />, name: 'PaLM2' },
-      anthropic: { icon: <AnthropicIcon size={size * 0.55} />, bg: '#d09a74', name: 'Claude' },
+      anthropic: {
+        icon: <AnthropicIcon size={size * 0.5555555555555556} />,
+        bg: '#d09a74',
+        name: 'Claude',
+      },
      bingAI: {
        icon: jailbreak ? (
          <img src="/assets/bingai-jb.png" alt="Bing Icon" />
@ -62,7 +66,7 @@ const Icon: React.FC<IconProps> = (props) => {
        name: jailbreak ? 'Sydney' : 'BingAI',
      },
      chatGPTBrowser: {
-        icon: <GPTIcon size={size * 0.55} />,
+        icon: <GPTIcon size={size * 0.5555555555555556} />,
        bg:
          typeof model === 'string' && model.toLowerCase().includes('gpt-4')
            ? '#AB68FF'
--- a/client/src/components/Messages/Content/CodeBlock.tsx
+++ b/client/src/components/Messages/Content/CodeBlock.tsx
@ -1,16 +1,23 @@
-import React, { useRef, useState, RefObject } from 'react';
 import copy from 'copy-to-clipboard';
-import { Clipboard, CheckMark } from '~/components';
 import { InfoIcon } from 'lucide-react';
-import { cn } from '~/utils/';
+import React, { useRef, useState, RefObject } from 'react';
+import Clipboard from '~/components/svg/Clipboard';
+import CheckMark from '~/components/svg/CheckMark';
+import cn from '~/utils/cn';

-interface CodeBarProps {
+type CodeBarProps = {
  lang: string;
  codeRef: RefObject<HTMLElement>;
  plugin?: boolean;
-}
+  error?: boolean;
+};

-const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, plugin = null }) => {
+type CodeBlockProps = Pick<CodeBarProps, 'lang' | 'plugin' | 'error'> & {
+  codeChildren: React.ReactNode;
+  classProp?: string;
+};
+
+const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, error, plugin = null }) => {
  const [isCopied, setIsCopied] = useState(false);
  return (
    <div className="relative flex items-center rounded-tl-md rounded-tr-md bg-gray-800 px-4 py-2 font-sans text-xs text-gray-200">
@ -19,7 +26,7 @@ const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, plugin = nu
        <InfoIcon className="ml-auto flex h-4 w-4 gap-2 text-white/50" />
      ) : (
        <button
-          className="ml-auto flex gap-2"
+          className={cn('ml-auto flex gap-2', error ? 'h-4 w-4 items-start text-white/50' : '')}
          onClick={async () => {
            const codeString = codeRef.current?.textContent;
            if (codeString) {
@ -35,12 +42,12 @@ const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, plugin = nu
          {isCopied ? (
            <>
              <CheckMark />
-              Copied!
+              {error ? '' : 'Copied!'}
            </>
          ) : (
            <>
              <Clipboard />
-              Copy code
+              {error ? '' : 'Copy code'}
            </>
          )}
        </button>
@ -49,30 +56,24 @@ const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, plugin = nu
  );
 });

-interface CodeBlockProps {
-  lang: string;
-  codeChildren: React.ReactNode;
-  classProp?: string;
-  plugin?: boolean;
-}
-
 const CodeBlock: React.FC<CodeBlockProps> = ({
  lang,
  codeChildren,
  classProp = '',
  plugin = null,
+  error,
 }) => {
  const codeRef = useRef<HTMLElement>(null);
-  const language = plugin ? 'json' : lang;
+  const language = plugin || error ? 'json' : lang;

  return (
    <div className="w-full rounded-md bg-black text-xs text-white/80">
-      <CodeBar lang={lang} codeRef={codeRef} plugin={!!plugin} />
+      <CodeBar lang={lang} codeRef={codeRef} plugin={!!plugin} error={error} />
      <div className={cn(classProp, 'overflow-y-auto p-4')}>
        <code
          ref={codeRef}
          className={cn(
-            plugin ? '!whitespace-pre-wrap' : `hljs language-${language} !whitespace-pre`,
+            plugin || error ? '!whitespace-pre-wrap' : `hljs language-${language} !whitespace-pre`,
          )}
        >
          {codeChildren}
--- a/client/src/components/Messages/Content/Error.tsx
+++ b/client/src/components/Messages/Content/Error.tsx
@ -1,7 +1,13 @@
+import React from 'react';
+import type { TOpenAIMessage } from 'librechat-data-provider';
+import { formatJSON, extractJson } from '~/utils/json';
+import CodeBlock from './CodeBlock';
+
 const isJson = (str: string) => {
  try {
    JSON.parse(str);
  } catch (e) {
+    console.error(e);
    return false;
  }
  return true;
@ -16,6 +22,17 @@ type TMessageLimit = {
  windowInMinutes: number;
 };

+type TTokenBalance = {
+  type: 'token_balance';
+  balance: number;
+  tokenCost: number;
+  promptTokens: number;
+  prev_count: number;
+  violation_count: number;
+  date: Date;
+  generations?: TOpenAIMessage[];
+};
+
 const errorMessages = {
  ban: 'Your account has been temporarily banned due to violations of our service.',
  invalid_api_key:
@ -34,12 +51,33 @@ const errorMessages = {
      windowInMinutes > 1 ? `${windowInMinutes} minutes` : 'minute'
    }.`;
  },
+  token_balance: (json: TTokenBalance) => {
+    const { balance, tokenCost, promptTokens, generations } = json;
+    const message = `Insufficient Funds! Balance: ${balance}. Prompt tokens: ${promptTokens}. Cost: ${tokenCost}.`;
+    return (
+      <>
+        {message}
+        {generations && (
+          <>
+            <br />
+            <br />
+          </>
+        )}
+        {generations && (
+          <CodeBlock
+            lang="Generations"
+            error={true}
+            codeChildren={formatJSON(JSON.stringify(generations))}
+          />
+        )}
+      </>
+    );
+  },
 };

-const getMessageError = (text: string) => {
-  const errorMessage = text.length > 512 ? text.slice(0, 512) + '...' : text;
-  const match = text.match(/\{[^{}]*\}/);
-  const jsonString = match ? match[0] : '';
+const Error = ({ text }: { text: string }) => {
+  const jsonString = extractJson(text);
+  const errorMessage = text.length > 512 && !jsonString ? text.slice(0, 512) + '...' : text;
  const defaultResponse = `Something went wrong. Here's the specific error message we encountered: ${errorMessage}`;

  if (!isJson(jsonString)) {
@ -59,4 +97,4 @@ const getMessageError = (text: string) => {
  }
 };

-export default getMessageError;
+export default Error;
--- a/client/src/components/Messages/Content/MessageContent.tsx
+++ b/client/src/components/Messages/Content/MessageContent.tsx
@ -2,11 +2,12 @@ import { Fragment } from 'react';
 import type { TResPlugin } from 'librechat-data-provider';
 import type { TMessageContent, TText, TDisplayProps } from '~/common';
 import { useAuthContext } from '~/hooks';
-import { cn, getMessageError } from '~/utils';
+import { cn } from '~/utils';
 import EditMessage from './EditMessage';
 import Container from './Container';
 import Markdown from './Markdown';
 import Plugin from './Plugin';
+import Error from './Error';

 const ErrorMessage = ({ text }: TText) => {
  const { logout } = useAuthContext();
@ -18,7 +19,7 @@ const ErrorMessage = ({ text }: TText) => {
  return (
    <Container>
      <div className="rounded-md border border-red-500 bg-red-500/10 px-3 py-2 text-sm text-gray-600 dark:text-gray-100">
-        {getMessageError(text)}
+        <Error text={text} />
      </div>
    </Container>
  );
--- a/client/src/components/Messages/Content/Plugin.tsx
+++ b/client/src/components/Messages/Content/Plugin.tsx
@ -1,11 +1,11 @@
+import { useRecoilValue } from 'recoil';
+import { Disclosure } from '@headlessui/react';
 import { useCallback, memo, ReactNode } from 'react';
 import type { TResPlugin, TInput } from 'librechat-data-provider';
 import { ChevronDownIcon, LucideProps } from 'lucide-react';
-import { Disclosure } from '@headlessui/react';
-import { useRecoilValue } from 'recoil';
+import { cn, formatJSON } from '~/utils';
 import { Spinner } from '~/components';
 import CodeBlock from './CodeBlock';
-import { cn } from '~/utils/';
 import store from '~/store';

 type PluginsMap = {
@ -16,14 +16,6 @@ type PluginIconProps = LucideProps & {
  className?: string;
 };

-function formatJSON(json: string) {
-  try {
-    return JSON.stringify(JSON.parse(json), null, 2);
-  } catch (e) {
-    return json;
-  }
-}
-
 function formatInputs(inputs: TInput[]) {
  let output = '';

--- a/client/src/components/Messages/Message.tsx
+++ b/client/src/components/Messages/Message.tsx
@ -94,7 +94,7 @@ export default function Message({
    ...conversation,
    ...message,
    model: message?.model ?? conversation?.model,
-    size: 38,
+    size: 36,
  });

  if (message?.bg && searchResult) {
--- a/client/src/components/Nav/NavLinks.tsx
+++ b/client/src/components/Nav/NavLinks.tsx
@ -1,27 +1,31 @@
 import { Download } from 'lucide-react';
 import { useRecoilValue } from 'recoil';
 import { Fragment, useState } from 'react';
+import { useGetUserBalance, useGetStartupConfig } from 'librechat-data-provider';
+import type { TConversation } from 'librechat-data-provider';
 import { Menu, Transition } from '@headlessui/react';
+import { ExportModel } from './ExportConversation';
 import ClearConvos from './ClearConvos';
 import Settings from './Settings';
 import NavLink from './NavLink';
 import Logout from './Logout';
-import { ExportModel } from './ExportConversation';
 import { LinkIcon, DotsIcon, GearIcon } from '~/components';
-import { useLocalize } from '~/hooks';
 import { useAuthContext } from '~/hooks/AuthContext';
+import { useLocalize } from '~/hooks';
 import { cn } from '~/utils/';

 import store from '~/store';

 export default function NavLinks() {
+  const balanceQuery = useGetUserBalance();
+  const { data: startupConfig } = useGetStartupConfig();
  const [showExports, setShowExports] = useState(false);
  const [showClearConvos, setShowClearConvos] = useState(false);
  const [showSettings, setShowSettings] = useState(false);
  const { user } = useAuthContext();
  const localize = useLocalize();

-  const conversation = useRecoilValue(store.conversation) || {};
+  const conversation = useRecoilValue(store.conversation) ?? ({} as TConversation);

  const exportable =
    conversation?.conversationId &&
@ -39,6 +43,11 @@ export default function NavLinks() {
      <Menu as="div" className="group relative">
        {({ open }) => (
          <>
+            {startupConfig?.checkBalance && balanceQuery.data && (
+              <div className="m-1 ml-3 whitespace-nowrap text-left text-sm text-gray-100">
+                {`Balance: ${balanceQuery.data}`}
+              </div>
+            )}
            <Menu.Button
              className={cn(
                'group-ui-open:bg-gray-800 flex w-full items-center gap-2.5 rounded-md px-3 py-3 text-sm transition-colors duration-200 hover:bg-gray-800',
--- a/client/src/hooks/useServerStream.ts
+++ b/client/src/hooks/useServerStream.ts
@ -1,7 +1,14 @@
 import { useEffect } from 'react';
 import { useResetRecoilState, useSetRecoilState } from 'recoil';
-/* @ts-ignore */
-import { SSE, createPayload, tMessageSchema, tConversationSchema } from 'librechat-data-provider';
+import {
+  /* @ts-ignore */
+  SSE,
+  createPayload,
+  useGetUserBalance,
+  tMessageSchema,
+  tConversationSchema,
+  useGetStartupConfig,
+} from 'librechat-data-provider';
 import type { TResPlugin, TMessage, TConversation, TSubmission } from 'librechat-data-provider';
 import useConversations from './useConversations';
 import { useAuthContext } from './AuthContext';
@ -24,7 +31,9 @@ export default function useServerStream(submission: TSubmission | null) {
  const resetLatestMessage = useResetRecoilState(store.latestMessage);
  const { token } = useAuthContext();

+  const { data: startupConfig } = useGetStartupConfig();
  const { refreshConversations } = useConversations();
+  const balanceQuery = useGetUserBalance();

  const messageHandler = (data: string, submission: TSubmission) => {
    const {
@ -228,6 +237,7 @@ export default function useServerStream(submission: TSubmission | null) {
      if (data.final) {
        const { plugins } = data;
        finalHandler(data, { ...submission, plugins, message });
+        startupConfig?.checkBalance && balanceQuery.refetch();
        console.log('final', data);
      }
      if (data.created) {
@ -253,6 +263,7 @@ export default function useServerStream(submission: TSubmission | null) {

    events.onerror = function (e: MessageEvent) {
      console.log('error in opening conn.');
+      startupConfig?.checkBalance && balanceQuery.refetch();
      events.close();

      const data = JSON.parse(e.data);
--- a/client/src/utils/cn.ts
+++ b/client/src/utils/cn.ts
@ -0,0 +1,6 @@
+import { twMerge } from 'tailwind-merge';
+import { clsx } from 'clsx';
+
+export default function cn(...inputs: string[]) {
+  return twMerge(clsx(inputs));
+}
--- a/client/src/utils/index.ts
+++ b/client/src/utils/index.ts
@ -1,20 +1,14 @@
-import { clsx } from 'clsx';
-import { twMerge } from 'tailwind-merge';
-
+export * from './json';
 export * from './languages';
+export { default as cn } from './cn';
 export { default as buildTree } from './buildTree';
 export { default as getLoginError } from './getLoginError';
 export { default as cleanupPreset } from './cleanupPreset';
 export { default as validateIframe } from './validateIframe';
-export { default as getMessageError } from './getMessageError';
 export { default as buildDefaultConvo } from './buildDefaultConvo';
 export { default as getDefaultEndpoint } from './getDefaultEndpoint';
 export { default as getLocalStorageItems } from './getLocalStorageItems';

-export function cn(...inputs: string[]) {
-  return twMerge(clsx(inputs));
-}
-
 export const languages = [
  'java',
  'c',
--- a/client/src/utils/json.ts
+++ b/client/src/utils/json.ts
@ -0,0 +1,28 @@
+export function formatJSON(json: string) {
+  try {
+    return JSON.stringify(JSON.parse(json), null, 2);
+  } catch (e) {
+    return json;
+  }
+}
+
+export function extractJson(text: string) {
+  let openBraces = 0;
+  let startIndex = -1;
+
+  for (let i = 0; i < text.length; i++) {
+    if (text[i] === '{') {
+      if (openBraces === 0) {
+        startIndex = i;
+      }
+      openBraces++;
+    } else if (text[i] === '}') {
+      openBraces--;
+      if (openBraces === 0 && startIndex !== -1) {
+        return text.slice(startIndex, i + 1);
+      }
+    }
+  }
+
+  return '';
+}