mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
57 lines
1.5 KiB
JavaScript
57 lines
1.5 KiB
JavaScript
|
|
const tokenSplit = require('./tokenSplit');
|
||
|
|
|
||
|
|
describe('tokenSplit', () => {
|
||
|
|
const text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam id.';
|
||
|
|
|
||
|
|
it('returns correct text chunks with provided parameters', async () => {
|
||
|
|
const result = await tokenSplit({
|
||
|
|
text: text,
|
||
|
|
encodingName: 'gpt2',
|
||
|
|
chunkSize: 2,
|
||
|
|
chunkOverlap: 1,
|
||
|
|
returnSize: 5,
|
||
|
|
});
|
||
|
|
|
||
|
|
expect(result).toEqual(['. Null', ' Nullam', 'am id', ' id.', '.']);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('returns correct text chunks with default parameters', async () => {
|
||
|
|
const result = await tokenSplit({ text });
|
||
|
|
expect(result).toEqual([
|
||
|
|
'Lorem',
|
||
|
|
' ipsum',
|
||
|
|
' dolor',
|
||
|
|
' sit',
|
||
|
|
' amet',
|
||
|
|
',',
|
||
|
|
' consectetur',
|
||
|
|
' adipiscing',
|
||
|
|
' elit',
|
||
|
|
'.',
|
||
|
|
' Null',
|
||
|
|
'am',
|
||
|
|
' id',
|
||
|
|
'.',
|
||
|
|
]);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('returns correct text chunks with specific return size', async () => {
|
||
|
|
const result = await tokenSplit({ text, returnSize: 2 });
|
||
|
|
expect(result.length).toEqual(2);
|
||
|
|
expect(result).toEqual([' id', '.']);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('returns correct text chunks with specified chunk size', async () => {
|
||
|
|
const result = await tokenSplit({ text, chunkSize: 10 });
|
||
|
|
expect(result).toEqual([
|
||
|
|
'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
|
||
|
|
' Nullam id.',
|
||
|
|
]);
|
||
|
|
});
|
||
|
|
|
||
|
|
it('returns empty array with no text', async () => {
|
||
|
|
const result = await tokenSplit({ text: '' });
|
||
|
|
expect(result).toEqual([]);
|
||
|
|
});
|
||
|
|
});
|