mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-14 14:38:51 +01:00
📊 refactor: Use Estimated Document Count for Meilisearch Sync (#11329)
* 🔧 refactor: use approximate number of documents to improve performance * 🔧 refactor: unittests for approximate document count in meilisearch sync * refactor: limits persentage based on approximate total count & one more test case
This commit is contained in:
parent
774f1f2cc2
commit
10f591ab1c
2 changed files with 144 additions and 4 deletions
|
|
@ -129,4 +129,139 @@ describe('Meilisearch Mongoose plugin', () => {
|
|||
|
||||
expect(mockAddDocuments).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
describe('estimatedDocumentCount usage in syncWithMeili', () => {
|
||||
test('syncWithMeili completes successfully with estimatedDocumentCount', async () => {
|
||||
// Clear any previous documents
|
||||
const conversationModel = createConversationModel(mongoose) as SchemaWithMeiliMethods;
|
||||
await conversationModel.deleteMany({});
|
||||
|
||||
// Create test documents
|
||||
await conversationModel.create({
|
||||
conversationId: new mongoose.Types.ObjectId(),
|
||||
user: new mongoose.Types.ObjectId(),
|
||||
title: 'Test Conversation 1',
|
||||
endpoint: EModelEndpoint.openAI,
|
||||
});
|
||||
|
||||
await conversationModel.create({
|
||||
conversationId: new mongoose.Types.ObjectId(),
|
||||
user: new mongoose.Types.ObjectId(),
|
||||
title: 'Test Conversation 2',
|
||||
endpoint: EModelEndpoint.openAI,
|
||||
});
|
||||
|
||||
// Trigger sync - should use estimatedDocumentCount internally
|
||||
await expect(conversationModel.syncWithMeili()).resolves.not.toThrow();
|
||||
|
||||
// Verify documents were processed
|
||||
expect(mockAddDocuments).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('syncWithMeili handles empty collection correctly', async () => {
|
||||
const messageModel = createMessageModel(mongoose) as SchemaWithMeiliMethods;
|
||||
await messageModel.deleteMany({});
|
||||
|
||||
// Verify collection is empty
|
||||
const count = await messageModel.estimatedDocumentCount();
|
||||
expect(count).toBe(0);
|
||||
|
||||
// Sync should complete without error even with 0 estimated documents
|
||||
await expect(messageModel.syncWithMeili()).resolves.not.toThrow();
|
||||
});
|
||||
|
||||
test('estimatedDocumentCount returns count for non-empty collection', async () => {
|
||||
const conversationModel = createConversationModel(mongoose) as SchemaWithMeiliMethods;
|
||||
await conversationModel.deleteMany({});
|
||||
|
||||
// Create documents
|
||||
await conversationModel.create({
|
||||
conversationId: new mongoose.Types.ObjectId(),
|
||||
user: new mongoose.Types.ObjectId(),
|
||||
title: 'Test 1',
|
||||
endpoint: EModelEndpoint.openAI,
|
||||
});
|
||||
|
||||
await conversationModel.create({
|
||||
conversationId: new mongoose.Types.ObjectId(),
|
||||
user: new mongoose.Types.ObjectId(),
|
||||
title: 'Test 2',
|
||||
endpoint: EModelEndpoint.openAI,
|
||||
});
|
||||
|
||||
const estimatedCount = await conversationModel.estimatedDocumentCount();
|
||||
expect(estimatedCount).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
test('estimatedDocumentCount is available on model', async () => {
|
||||
const messageModel = createMessageModel(mongoose) as SchemaWithMeiliMethods;
|
||||
|
||||
// Verify the method exists and is callable
|
||||
expect(typeof messageModel.estimatedDocumentCount).toBe('function');
|
||||
|
||||
// Should be able to call it
|
||||
const result = await messageModel.estimatedDocumentCount();
|
||||
expect(typeof result).toBe('number');
|
||||
expect(result).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
test('syncWithMeili handles mix of syncable and TTL documents correctly', async () => {
|
||||
const messageModel = createMessageModel(mongoose) as SchemaWithMeiliMethods;
|
||||
await messageModel.deleteMany({});
|
||||
mockAddDocuments.mockClear();
|
||||
|
||||
// Create syncable documents (expiredAt: null)
|
||||
await messageModel.create({
|
||||
messageId: new mongoose.Types.ObjectId(),
|
||||
conversationId: new mongoose.Types.ObjectId(),
|
||||
user: new mongoose.Types.ObjectId(),
|
||||
isCreatedByUser: true,
|
||||
expiredAt: null,
|
||||
});
|
||||
|
||||
await messageModel.create({
|
||||
messageId: new mongoose.Types.ObjectId(),
|
||||
conversationId: new mongoose.Types.ObjectId(),
|
||||
user: new mongoose.Types.ObjectId(),
|
||||
isCreatedByUser: false,
|
||||
expiredAt: null,
|
||||
});
|
||||
|
||||
// Create TTL documents (expiredAt set to a date)
|
||||
await messageModel.create({
|
||||
messageId: new mongoose.Types.ObjectId(),
|
||||
conversationId: new mongoose.Types.ObjectId(),
|
||||
user: new mongoose.Types.ObjectId(),
|
||||
isCreatedByUser: true,
|
||||
expiredAt: new Date(),
|
||||
});
|
||||
|
||||
await messageModel.create({
|
||||
messageId: new mongoose.Types.ObjectId(),
|
||||
conversationId: new mongoose.Types.ObjectId(),
|
||||
user: new mongoose.Types.ObjectId(),
|
||||
isCreatedByUser: false,
|
||||
expiredAt: new Date(),
|
||||
});
|
||||
|
||||
// estimatedDocumentCount should count all documents (both syncable and TTL)
|
||||
const estimatedCount = await messageModel.estimatedDocumentCount();
|
||||
expect(estimatedCount).toBe(4);
|
||||
|
||||
// Actual syncable documents (expiredAt: null)
|
||||
const syncableCount = await messageModel.countDocuments({ expiredAt: null });
|
||||
expect(syncableCount).toBe(2);
|
||||
|
||||
// Sync should complete successfully even though estimated count is higher than processed count
|
||||
await expect(messageModel.syncWithMeili()).resolves.not.toThrow();
|
||||
|
||||
// Only syncable documents should be indexed (2 documents, not 4)
|
||||
// The mock should be called once per batch, and we have 2 documents
|
||||
expect(mockAddDocuments).toHaveBeenCalled();
|
||||
|
||||
// Verify that only 2 documents were indexed (the syncable ones)
|
||||
const indexedCount = await messageModel.countDocuments({ _meiliIndex: true });
|
||||
expect(indexedCount).toBe(2);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -189,8 +189,10 @@ const createMeiliMongooseModel = ({
|
|||
query._id = { $gt: options.resumeFromId };
|
||||
}
|
||||
|
||||
// Get total count for progress tracking
|
||||
const totalCount = await this.countDocuments(query);
|
||||
// Get approximate total count for progress tracking
|
||||
const approxTotalCount = await this.estimatedDocumentCount();
|
||||
logger.info(`[syncWithMeili] Approximate total number of documents to sync: ${approxTotalCount}`);
|
||||
|
||||
let processedCount = 0;
|
||||
|
||||
// First, handle documents that need to be removed from Meili
|
||||
|
|
@ -239,8 +241,11 @@ const createMeiliMongooseModel = ({
|
|||
updateOps = [];
|
||||
|
||||
// Log progress
|
||||
const progress = Math.round((processedCount / totalCount) * 100);
|
||||
logger.info(`[syncWithMeili] Progress: ${progress}% (${processedCount}/${totalCount})`);
|
||||
// Calculate percentage based on approximate total count sometimes might lead to more than 100%
|
||||
// the difference is very small and acceptable for progress tracking
|
||||
const percent = Math.round((processedCount / approxTotalCount) * 100);
|
||||
const progress = Math.min(percent, 100);
|
||||
logger.info(`[syncWithMeili] Progress: ${progress}% (count: ${processedCount})`);
|
||||
|
||||
// Add delay to prevent overwhelming resources
|
||||
if (delayMs > 0) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue