import { processTextWithTokenLimit, TokenCountFn } from './text'; import Tokenizer, { countTokens } from './tokenizer'; jest.mock('@librechat/data-schemas', () => ({ logger: { debug: jest.fn(), warn: jest.fn(), error: jest.fn(), }, })); /** * OLD IMPLEMENTATION (Binary Search) - kept for comparison testing * This is the original algorithm that caused CPU spikes */ async function processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn, }: { text: string; tokenLimit: number; tokenCountFn: TokenCountFn; }): Promise<{ text: string; tokenCount: number; wasTruncated: boolean }> { const originalTokenCount = await tokenCountFn(text); if (originalTokenCount <= tokenLimit) { return { text, tokenCount: originalTokenCount, wasTruncated: false, }; } let low = 0; let high = text.length; let bestText = ''; while (low <= high) { const mid = Math.floor((low + high) / 2); const truncatedText = text.substring(0, mid); const tokenCount = await tokenCountFn(truncatedText); if (tokenCount <= tokenLimit) { bestText = truncatedText; low = mid + 1; } else { high = mid - 1; } } const finalTokenCount = await tokenCountFn(bestText); return { text: bestText, tokenCount: finalTokenCount, wasTruncated: true, }; } /** * Creates a wrapper around Tokenizer.getTokenCount that tracks call count */ const createRealTokenCounter = () => { let callCount = 0; const tokenCountFn = (text: string): number => { callCount++; return Tokenizer.getTokenCount(text, 'cl100k_base'); }; return { tokenCountFn, getCallCount: () => callCount, resetCallCount: () => { callCount = 0; }, }; }; /** * Creates a wrapper around the async countTokens function that tracks call count */ const createCountTokensCounter = () => { let callCount = 0; const tokenCountFn = async (text: string): Promise => { callCount++; return countTokens(text); }; return { tokenCountFn, getCallCount: () => callCount, resetCallCount: () => { callCount = 0; }, }; }; describe('processTextWithTokenLimit', () => { /** * Creates a mock token count function that simulates realistic token counting. * Roughly 4 characters per token (common for English text). * Tracks call count to verify efficiency. */ const createMockTokenCounter = () => { let callCount = 0; const tokenCountFn = (text: string): number => { callCount++; return Math.ceil(text.length / 4); }; return { tokenCountFn, getCallCount: () => callCount, resetCallCount: () => { callCount = 0; }, }; }; /** Creates a string of specified character length */ const createTextOfLength = (charLength: number): string => { return 'a'.repeat(charLength); }; /** Creates realistic text content with varied token density */ const createRealisticText = (approximateTokens: number): string => { const words = [ 'the', 'quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog', 'lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit', 'sed', 'do', 'eiusmod', 'tempor', 'incididunt', 'ut', 'labore', 'et', 'dolore', 'magna', 'aliqua', 'enim', 'ad', 'minim', 'veniam', 'authentication', 'implementation', 'configuration', 'documentation', ]; const result: string[] = []; for (let i = 0; i < approximateTokens; i++) { result.push(words[i % words.length]); } return result.join(' '); }; describe('tokenCountFn flexibility (sync and async)', () => { it('should work with synchronous tokenCountFn', async () => { const syncTokenCountFn = (text: string): number => Math.ceil(text.length / 4); const text = 'Hello, world! This is a test message.'; const tokenLimit = 5; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: syncTokenCountFn, }); expect(result.wasTruncated).toBe(true); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); }); it('should work with asynchronous tokenCountFn', async () => { const asyncTokenCountFn = async (text: string): Promise => { await new Promise((resolve) => setTimeout(resolve, 1)); return Math.ceil(text.length / 4); }; const text = 'Hello, world! This is a test message.'; const tokenLimit = 5; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: asyncTokenCountFn, }); expect(result.wasTruncated).toBe(true); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); }); it('should produce equivalent results with sync and async tokenCountFn', async () => { const syncTokenCountFn = (text: string): number => Math.ceil(text.length / 4); const asyncTokenCountFn = async (text: string): Promise => Math.ceil(text.length / 4); const text = 'a'.repeat(8000); const tokenLimit = 1000; const syncResult = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: syncTokenCountFn, }); const asyncResult = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: asyncTokenCountFn, }); expect(syncResult.tokenCount).toBe(asyncResult.tokenCount); expect(syncResult.wasTruncated).toBe(asyncResult.wasTruncated); expect(syncResult.text.length).toBe(asyncResult.text.length); }); }); describe('when text is under the token limit', () => { it('should return original text unchanged', async () => { const { tokenCountFn } = createMockTokenCounter(); const text = 'Hello, world!'; const tokenLimit = 100; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(result.text).toBe(text); expect(result.wasTruncated).toBe(false); }); it('should return correct token count', async () => { const { tokenCountFn } = createMockTokenCounter(); const text = 'Hello, world!'; const tokenLimit = 100; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(result.tokenCount).toBe(Math.ceil(text.length / 4)); }); it('should only call tokenCountFn once when under limit', async () => { const { tokenCountFn, getCallCount } = createMockTokenCounter(); const text = 'Hello, world!'; const tokenLimit = 100; await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(getCallCount()).toBe(1); }); }); describe('when text is exactly at the token limit', () => { it('should return original text unchanged', async () => { const { tokenCountFn } = createMockTokenCounter(); const text = createTextOfLength(400); const tokenLimit = 100; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(result.text).toBe(text); expect(result.wasTruncated).toBe(false); expect(result.tokenCount).toBe(tokenLimit); }); }); describe('when text exceeds the token limit', () => { it('should truncate text to fit within limit', async () => { const { tokenCountFn } = createMockTokenCounter(); const text = createTextOfLength(8000); const tokenLimit = 1000; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(result.wasTruncated).toBe(true); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); expect(result.text.length).toBeLessThan(text.length); }); it('should truncate text to be close to but not exceed the limit', async () => { const { tokenCountFn } = createMockTokenCounter(); const text = createTextOfLength(8000); const tokenLimit = 1000; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); expect(result.tokenCount).toBeGreaterThan(tokenLimit * 0.9); }); }); describe('efficiency - tokenCountFn call count', () => { it('should call tokenCountFn at most 7 times for large text (vs ~17 for binary search)', async () => { const { tokenCountFn, getCallCount } = createMockTokenCounter(); const text = createTextOfLength(400000); const tokenLimit = 50000; await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(getCallCount()).toBeLessThanOrEqual(7); }); it('should typically call tokenCountFn only 2-3 times for standard truncation', async () => { const { tokenCountFn, getCallCount } = createMockTokenCounter(); const text = createTextOfLength(40000); const tokenLimit = 5000; await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(getCallCount()).toBeLessThanOrEqual(3); }); it('should call tokenCountFn only once when text is under limit', async () => { const { tokenCountFn, getCallCount } = createMockTokenCounter(); const text = createTextOfLength(1000); const tokenLimit = 10000; await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(getCallCount()).toBe(1); }); it('should handle very large text (100k+ tokens) efficiently', async () => { const { tokenCountFn, getCallCount } = createMockTokenCounter(); const text = createTextOfLength(500000); const tokenLimit = 100000; await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(getCallCount()).toBeLessThanOrEqual(7); }); }); describe('edge cases', () => { it('should handle empty text', async () => { const { tokenCountFn } = createMockTokenCounter(); const text = ''; const tokenLimit = 100; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(result.text).toBe(''); expect(result.tokenCount).toBe(0); expect(result.wasTruncated).toBe(false); }); it('should handle token limit of 1', async () => { const { tokenCountFn } = createMockTokenCounter(); const text = createTextOfLength(1000); const tokenLimit = 1; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(result.wasTruncated).toBe(true); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); }); it('should handle text that is just slightly over the limit', async () => { const { tokenCountFn } = createMockTokenCounter(); const text = createTextOfLength(404); const tokenLimit = 100; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn, }); expect(result.wasTruncated).toBe(true); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); }); }); describe('correctness with variable token density', () => { it('should handle text with varying token density', async () => { const variableDensityTokenCounter = (text: string): number => { const shortWords = (text.match(/\s+/g) || []).length; return Math.ceil(text.length / 4) + shortWords; }; const text = 'This is a test with many short words and some longer concatenated words too'; const tokenLimit = 10; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: variableDensityTokenCounter, }); expect(result.wasTruncated).toBe(true); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); }); }); describe('direct comparison with OLD binary search implementation', () => { it('should produce equivalent results to the old implementation', async () => { const oldCounter = createMockTokenCounter(); const newCounter = createMockTokenCounter(); const text = createTextOfLength(8000); const tokenLimit = 1000; const oldResult = await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); const newResult = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); expect(newResult.wasTruncated).toBe(oldResult.wasTruncated); expect(newResult.tokenCount).toBeLessThanOrEqual(tokenLimit); expect(oldResult.tokenCount).toBeLessThanOrEqual(tokenLimit); }); it('should use significantly fewer tokenCountFn calls than old implementation (400k chars)', async () => { const oldCounter = createMockTokenCounter(); const newCounter = createMockTokenCounter(); const text = createTextOfLength(400000); const tokenLimit = 50000; await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); console.log( `[400k chars] OLD implementation: ${oldCalls} calls, NEW implementation: ${newCalls} calls`, ); console.log(`[400k chars] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`); expect(newCalls).toBeLessThan(oldCalls); expect(newCalls).toBeLessThanOrEqual(7); }); it('should use significantly fewer tokenCountFn calls than old implementation (500k chars, 100k token limit)', async () => { const oldCounter = createMockTokenCounter(); const newCounter = createMockTokenCounter(); const text = createTextOfLength(500000); const tokenLimit = 100000; await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); console.log( `[500k chars] OLD implementation: ${oldCalls} calls, NEW implementation: ${newCalls} calls`, ); console.log(`[500k chars] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`); expect(newCalls).toBeLessThan(oldCalls); }); it('should achieve at least 70% reduction in tokenCountFn calls', async () => { const oldCounter = createMockTokenCounter(); const newCounter = createMockTokenCounter(); const text = createTextOfLength(500000); const tokenLimit = 100000; await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); const reduction = 1 - newCalls / oldCalls; console.log( `Efficiency improvement: ${(reduction * 100).toFixed(1)}% fewer tokenCountFn calls`, ); expect(reduction).toBeGreaterThanOrEqual(0.7); }); it('should simulate the reported scenario (122k tokens, 100k limit)', async () => { const oldCounter = createMockTokenCounter(); const newCounter = createMockTokenCounter(); const text = createTextOfLength(489564); const tokenLimit = 100000; await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); console.log(`[User reported scenario: ~122k tokens]`); console.log(`OLD implementation: ${oldCalls} tokenCountFn calls`); console.log(`NEW implementation: ${newCalls} tokenCountFn calls`); console.log(`Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`); expect(newCalls).toBeLessThan(oldCalls); expect(newCalls).toBeLessThanOrEqual(7); }); }); describe('direct comparison with REAL tiktoken tokenizer', () => { beforeEach(() => { Tokenizer.freeAndResetAllEncoders(); }); it('should produce valid truncation with real tokenizer', async () => { const counter = createRealTokenCounter(); const text = createRealisticText(5000); const tokenLimit = 1000; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: counter.tokenCountFn, }); expect(result.wasTruncated).toBe(true); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); expect(result.text.length).toBeLessThan(text.length); }); it('should use fewer tiktoken calls than old implementation (realistic text)', async () => { const oldCounter = createRealTokenCounter(); const newCounter = createRealTokenCounter(); const text = createRealisticText(15000); const tokenLimit = 5000; await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); Tokenizer.freeAndResetAllEncoders(); await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); console.log(`[Real tiktoken ~15k tokens] OLD: ${oldCalls} calls, NEW: ${newCalls} calls`); console.log(`[Real tiktoken] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`); expect(newCalls).toBeLessThan(oldCalls); }); it('should handle the reported user scenario with real tokenizer (~120k tokens)', async () => { const oldCounter = createRealTokenCounter(); const newCounter = createRealTokenCounter(); const text = createRealisticText(120000); const tokenLimit = 100000; const startOld = performance.now(); await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); const timeOld = performance.now() - startOld; Tokenizer.freeAndResetAllEncoders(); const startNew = performance.now(); const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const timeNew = performance.now() - startNew; const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); console.log(`\n[REAL TIKTOKEN - User reported scenario: ~120k tokens]`); console.log(`OLD implementation: ${oldCalls} tiktoken calls, ${timeOld.toFixed(0)}ms`); console.log(`NEW implementation: ${newCalls} tiktoken calls, ${timeNew.toFixed(0)}ms`); console.log(`Call reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`); console.log(`Time reduction: ${((1 - timeNew / timeOld) * 100).toFixed(1)}%`); console.log( `Result: truncated=${result.wasTruncated}, tokens=${result.tokenCount}/${tokenLimit}\n`, ); expect(newCalls).toBeLessThan(oldCalls); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); expect(newCalls).toBeLessThanOrEqual(7); }); it('should achieve at least 70% reduction with real tokenizer', async () => { const oldCounter = createRealTokenCounter(); const newCounter = createRealTokenCounter(); const text = createRealisticText(50000); const tokenLimit = 10000; await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); Tokenizer.freeAndResetAllEncoders(); await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); const reduction = 1 - newCalls / oldCalls; console.log( `[Real tiktoken 50k tokens] OLD: ${oldCalls}, NEW: ${newCalls}, Reduction: ${(reduction * 100).toFixed(1)}%`, ); expect(reduction).toBeGreaterThanOrEqual(0.7); }); }); describe('using countTokens async function from @librechat/api', () => { beforeEach(() => { Tokenizer.freeAndResetAllEncoders(); }); it('countTokens should return correct token count', async () => { const text = 'Hello, world!'; const count = await countTokens(text); expect(count).toBeGreaterThan(0); expect(typeof count).toBe('number'); }); it('countTokens should handle empty string', async () => { const count = await countTokens(''); expect(count).toBe(0); }); it('should work with processTextWithTokenLimit using countTokens', async () => { const counter = createCountTokensCounter(); const text = createRealisticText(5000); const tokenLimit = 1000; const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: counter.tokenCountFn, }); expect(result.wasTruncated).toBe(true); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); expect(result.text.length).toBeLessThan(text.length); }); it('should use fewer countTokens calls than old implementation', async () => { const oldCounter = createCountTokensCounter(); const newCounter = createCountTokensCounter(); const text = createRealisticText(15000); const tokenLimit = 5000; await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); Tokenizer.freeAndResetAllEncoders(); await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); console.log(`[countTokens ~15k tokens] OLD: ${oldCalls} calls, NEW: ${newCalls} calls`); console.log(`[countTokens] Reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`); expect(newCalls).toBeLessThan(oldCalls); }); it('should handle user reported scenario with countTokens (~120k tokens)', async () => { const oldCounter = createCountTokensCounter(); const newCounter = createCountTokensCounter(); const text = createRealisticText(120000); const tokenLimit = 100000; const startOld = performance.now(); await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); const timeOld = performance.now() - startOld; Tokenizer.freeAndResetAllEncoders(); const startNew = performance.now(); const result = await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const timeNew = performance.now() - startNew; const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); console.log(`\n[countTokens - User reported scenario: ~120k tokens]`); console.log(`OLD implementation: ${oldCalls} countTokens calls, ${timeOld.toFixed(0)}ms`); console.log(`NEW implementation: ${newCalls} countTokens calls, ${timeNew.toFixed(0)}ms`); console.log(`Call reduction: ${((1 - newCalls / oldCalls) * 100).toFixed(1)}%`); console.log(`Time reduction: ${((1 - timeNew / timeOld) * 100).toFixed(1)}%`); console.log( `Result: truncated=${result.wasTruncated}, tokens=${result.tokenCount}/${tokenLimit}\n`, ); expect(newCalls).toBeLessThan(oldCalls); expect(result.tokenCount).toBeLessThanOrEqual(tokenLimit); expect(newCalls).toBeLessThanOrEqual(7); }); it('should achieve at least 70% reduction with countTokens', async () => { const oldCounter = createCountTokensCounter(); const newCounter = createCountTokensCounter(); const text = createRealisticText(50000); const tokenLimit = 10000; await processTextWithTokenLimitOLD({ text, tokenLimit, tokenCountFn: oldCounter.tokenCountFn, }); Tokenizer.freeAndResetAllEncoders(); await processTextWithTokenLimit({ text, tokenLimit, tokenCountFn: newCounter.tokenCountFn, }); const oldCalls = oldCounter.getCallCount(); const newCalls = newCounter.getCallCount(); const reduction = 1 - newCalls / oldCalls; console.log( `[countTokens 50k tokens] OLD: ${oldCalls}, NEW: ${newCalls}, Reduction: ${(reduction * 100).toFixed(1)}%`, ); expect(reduction).toBeGreaterThanOrEqual(0.7); }); }); });