LibreChat/api/db/indexSync.js

175 lines
5.7 KiB
JavaScript
Raw Normal View History

🏗️ refactor: Extract DB layers to `data-schemas` for shared use (#7650) * refactor: move model definitions and database-related methods to packages/data-schemas * ci: update tests due to new DB structure fix: disable mocking `librechat-data-provider` feat: Add schema exports to data-schemas package - Introduced a new schema module that exports various schemas including action, agent, and user schemas. - Updated index.ts to include the new schema exports for better modularity and organization. ci: fix appleStrategy tests fix: Agent.spec.js ci: refactor handleTools tests to use MongoMemoryServer for in-memory database fix: getLogStores imports ci: update banViolation tests to use MongoMemoryServer and improve session mocking test: refactor samlStrategy tests to improve mock configurations and user handling ci: fix crypto mock in handleText tests for improved accuracy ci: refactor spendTokens tests to improve model imports and setup ci: refactor Message model tests to use MongoMemoryServer and improve database interactions * refactor: streamline IMessage interface and move feedback properties to types/message.ts * refactor: use exported initializeRoles from `data-schemas`, remove api workspace version (this serves as an example of future migrations that still need to happen) * refactor: update model imports to use destructuring from `~/db/models` for consistency and clarity * refactor: remove unused mongoose imports from model files for cleaner code * refactor: remove unused mongoose imports from Share, Prompt, and Transaction model files for cleaner code * refactor: remove unused import in Transaction model for cleaner code * ci: update deploy workflow to reference new Docker Dev Branch Images Build and add new workflow for building Docker images on dev branch * chore: cleanup imports
2025-05-30 22:18:13 -04:00
const mongoose = require('mongoose');
const { MeiliSearch } = require('meilisearch');
🏗️ refactor: Extract DB layers to `data-schemas` for shared use (#7650) * refactor: move model definitions and database-related methods to packages/data-schemas * ci: update tests due to new DB structure fix: disable mocking `librechat-data-provider` feat: Add schema exports to data-schemas package - Introduced a new schema module that exports various schemas including action, agent, and user schemas. - Updated index.ts to include the new schema exports for better modularity and organization. ci: fix appleStrategy tests fix: Agent.spec.js ci: refactor handleTools tests to use MongoMemoryServer for in-memory database fix: getLogStores imports ci: update banViolation tests to use MongoMemoryServer and improve session mocking test: refactor samlStrategy tests to improve mock configurations and user handling ci: fix crypto mock in handleText tests for improved accuracy ci: refactor spendTokens tests to improve model imports and setup ci: refactor Message model tests to use MongoMemoryServer and improve database interactions * refactor: streamline IMessage interface and move feedback properties to types/message.ts * refactor: use exported initializeRoles from `data-schemas`, remove api workspace version (this serves as an example of future migrations that still need to happen) * refactor: update model imports to use destructuring from `~/db/models` for consistency and clarity * refactor: remove unused mongoose imports from model files for cleaner code * refactor: remove unused mongoose imports from Share, Prompt, and Transaction model files for cleaner code * refactor: remove unused import in Transaction model for cleaner code * ci: update deploy workflow to reference new Docker Dev Branch Images Build and add new workflow for building Docker images on dev branch * chore: cleanup imports
2025-05-30 22:18:13 -04:00
const { logger } = require('@librechat/data-schemas');
const { FlowStateManager } = require('@librechat/api');
const { CacheKeys } = require('librechat-data-provider');
🏗️ refactor: Extract DB layers to `data-schemas` for shared use (#7650) * refactor: move model definitions and database-related methods to packages/data-schemas * ci: update tests due to new DB structure fix: disable mocking `librechat-data-provider` feat: Add schema exports to data-schemas package - Introduced a new schema module that exports various schemas including action, agent, and user schemas. - Updated index.ts to include the new schema exports for better modularity and organization. ci: fix appleStrategy tests fix: Agent.spec.js ci: refactor handleTools tests to use MongoMemoryServer for in-memory database fix: getLogStores imports ci: update banViolation tests to use MongoMemoryServer and improve session mocking test: refactor samlStrategy tests to improve mock configurations and user handling ci: fix crypto mock in handleText tests for improved accuracy ci: refactor spendTokens tests to improve model imports and setup ci: refactor Message model tests to use MongoMemoryServer and improve database interactions * refactor: streamline IMessage interface and move feedback properties to types/message.ts * refactor: use exported initializeRoles from `data-schemas`, remove api workspace version (this serves as an example of future migrations that still need to happen) * refactor: update model imports to use destructuring from `~/db/models` for consistency and clarity * refactor: remove unused mongoose imports from model files for cleaner code * refactor: remove unused mongoose imports from Share, Prompt, and Transaction model files for cleaner code * refactor: remove unused import in Transaction model for cleaner code * ci: update deploy workflow to reference new Docker Dev Branch Images Build and add new workflow for building Docker images on dev branch * chore: cleanup imports
2025-05-30 22:18:13 -04:00
const { isEnabled } = require('~/server/utils');
const { getLogStores } = require('~/cache');
🏗️ refactor: Extract DB layers to `data-schemas` for shared use (#7650) * refactor: move model definitions and database-related methods to packages/data-schemas * ci: update tests due to new DB structure fix: disable mocking `librechat-data-provider` feat: Add schema exports to data-schemas package - Introduced a new schema module that exports various schemas including action, agent, and user schemas. - Updated index.ts to include the new schema exports for better modularity and organization. ci: fix appleStrategy tests fix: Agent.spec.js ci: refactor handleTools tests to use MongoMemoryServer for in-memory database fix: getLogStores imports ci: update banViolation tests to use MongoMemoryServer and improve session mocking test: refactor samlStrategy tests to improve mock configurations and user handling ci: fix crypto mock in handleText tests for improved accuracy ci: refactor spendTokens tests to improve model imports and setup ci: refactor Message model tests to use MongoMemoryServer and improve database interactions * refactor: streamline IMessage interface and move feedback properties to types/message.ts * refactor: use exported initializeRoles from `data-schemas`, remove api workspace version (this serves as an example of future migrations that still need to happen) * refactor: update model imports to use destructuring from `~/db/models` for consistency and clarity * refactor: remove unused mongoose imports from model files for cleaner code * refactor: remove unused mongoose imports from Share, Prompt, and Transaction model files for cleaner code * refactor: remove unused import in Transaction model for cleaner code * ci: update deploy workflow to reference new Docker Dev Branch Images Build and add new workflow for building Docker images on dev branch * chore: cleanup imports
2025-05-30 22:18:13 -04:00
const Conversation = mongoose.models.Conversation;
const Message = mongoose.models.Message;
const searchEnabled = isEnabled(process.env.SEARCH);
const indexingDisabled = isEnabled(process.env.MEILI_NO_SYNC);
let currentTimeout = null;
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
class MeiliSearchClient {
static instance = null;
static getInstance() {
if (!MeiliSearchClient.instance) {
if (!process.env.MEILI_HOST || !process.env.MEILI_MASTER_KEY) {
throw new Error('Meilisearch configuration is missing.');
}
MeiliSearchClient.instance = new MeiliSearch({
host: process.env.MEILI_HOST,
apiKey: process.env.MEILI_MASTER_KEY,
});
}
return MeiliSearchClient.instance;
}
}
/**
* Performs the actual sync operations for messages and conversations
*/
async function performSync() {
const client = MeiliSearchClient.getInstance();
const { status } = await client.health();
if (status !== 'available') {
throw new Error('Meilisearch not available');
}
if (indexingDisabled === true) {
logger.info('[indexSync] Indexing is disabled, skipping...');
return { messagesSync: false, convosSync: false };
}
let messagesSync = false;
let convosSync = false;
// Check if we need to sync messages
const messageProgress = await Message.getSyncProgress();
if (!messageProgress.isComplete) {
logger.info(
`[indexSync] Messages need syncing: ${messageProgress.totalProcessed}/${messageProgress.totalDocuments} indexed`,
);
// Check if we should do a full sync or incremental
const messageCount = await Message.countDocuments();
const messagesIndexed = messageProgress.totalProcessed;
const syncThreshold = parseInt(process.env.MEILI_SYNC_THRESHOLD || '1000', 10);
if (messageCount - messagesIndexed > syncThreshold) {
logger.info('[indexSync] Starting full message sync due to large difference');
await Message.syncWithMeili();
messagesSync = true;
} else if (messageCount !== messagesIndexed) {
logger.warn('[indexSync] Messages out of sync, performing incremental sync');
await Message.syncWithMeili();
messagesSync = true;
}
} else {
logger.info(
`[indexSync] Messages are fully synced: ${messageProgress.totalProcessed}/${messageProgress.totalDocuments}`,
);
}
// Check if we need to sync conversations
const convoProgress = await Conversation.getSyncProgress();
if (!convoProgress.isComplete) {
logger.info(
`[indexSync] Conversations need syncing: ${convoProgress.totalProcessed}/${convoProgress.totalDocuments} indexed`,
);
const convoCount = await Conversation.countDocuments();
const convosIndexed = convoProgress.totalProcessed;
const syncThreshold = parseInt(process.env.MEILI_SYNC_THRESHOLD || '1000', 10);
if (convoCount - convosIndexed > syncThreshold) {
logger.info('[indexSync] Starting full conversation sync due to large difference');
await Conversation.syncWithMeili();
convosSync = true;
} else if (convoCount !== convosIndexed) {
logger.warn('[indexSync] Convos out of sync, performing incremental sync');
await Conversation.syncWithMeili();
convosSync = true;
}
} else {
logger.info(
`[indexSync] Conversations are fully synced: ${convoProgress.totalProcessed}/${convoProgress.totalDocuments}`,
);
}
return { messagesSync, convosSync };
}
/**
* Main index sync function that uses FlowStateManager to prevent concurrent execution
*/
async function indexSync() {
if (!searchEnabled) {
return;
}
logger.info('[indexSync] Starting index synchronization check...');
try {
// Get or create FlowStateManager instance
const flowsCache = getLogStores(CacheKeys.FLOWS);
if (!flowsCache) {
logger.warn('[indexSync] Flows cache not available, falling back to direct sync');
return await performSync();
}
const flowManager = new FlowStateManager(flowsCache, {
ttl: 60000 * 10, // 10 minutes TTL for sync operations
});
// Use a unique flow ID for the sync operation
const flowId = 'meili-index-sync';
const flowType = 'MEILI_SYNC';
// This will only execute the handler if no other instance is running the sync
const result = await flowManager.createFlowWithHandler(flowId, flowType, performSync);
if (result.messagesSync || result.convosSync) {
logger.info('[indexSync] Sync completed successfully');
} else {
logger.debug('[indexSync] No sync was needed');
}
return result;
} catch (err) {
if (err.message.includes('flow already exists')) {
logger.info('[indexSync] Sync already running on another instance');
return;
}
2023-03-22 17:15:32 -04:00
if (err.message.includes('not found')) {
logger.debug('[indexSync] Creating indices...');
2023-03-23 13:16:07 -04:00
currentTimeout = setTimeout(async () => {
2023-03-22 17:15:32 -04:00
try {
await Message.syncWithMeili();
await Conversation.syncWithMeili();
} catch (err) {
logger.error('[indexSync] Trouble creating indices, try restarting the server.', err);
2023-03-22 17:15:32 -04:00
}
}, 750);
} else if (err.message.includes('Meilisearch not configured')) {
logger.info('[indexSync] Meilisearch not configured, search will be disabled.');
2023-03-22 17:15:32 -04:00
} else {
logger.error('[indexSync] error', err);
2023-03-22 17:15:32 -04:00
}
}
}
2023-03-23 13:16:07 -04:00
process.on('exit', () => {
logger.debug('[indexSync] Clearing sync timeouts before exiting...');
2023-03-23 13:16:07 -04:00
clearTimeout(currentTimeout);
});
module.exports = indexSync;