From 3398f6a17aafe7ee961d3cc5323546d2df8b961c Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Fri, 13 Feb 2026 02:14:34 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=98=20feat:=20FerretDB=20Compatibility?= =?UTF-8?q?=20(#11769)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: replace unsupported MongoDB aggregation operators for FerretDB compatibility Replace $lookup, $unwind, $sample, $replaceRoot, and $addFields aggregation stages which are unsupported on FerretDB v2.x (postgres-documentdb backend). - Prompt.js: Replace $lookup/$unwind/$project pipelines with find().select().lean() + attachProductionPrompts() batch helper. Replace $group/$replaceRoot/$sample in getRandomPromptGroups with distinct() + Fisher-Yates shuffle. - Agent/Prompt migration scripts: Replace $lookup anti-join pattern with distinct() + $nin two-step queries for finding un-migrated resources. All replacement patterns verified against FerretDB v2.7.0. Co-authored-by: Cursor * fix: use $pullAll for simple array removals, fix memberIds type mismatches Replace $pull with $pullAll for exact-value scalar array removals. Both operators work on MongoDB and FerretDB, but $pullAll is more explicit for exact matching (no condition expressions). Fix critical type mismatch bugs where ObjectId values were used against String[] memberIds arrays in Group queries: - config/delete-user.js: use string uid instead of ObjectId user._id - e2e/setup/cleanupUser.ts: convert userId.toString() before query Harden PermissionService.bulkUpdateResourcePermissions abort handling to prevent crash when abortTransaction is called after commitTransaction. All changes verified against FerretDB v2.7.0 and MongoDB Memory Server. Co-authored-by: Cursor * fix: harden transaction support probe for FerretDB compatibility Commit the transaction before aborting in supportsTransactions probe, and wrap abortTransaction in try-catch to prevent crashes when abort is called after a successful commit (observed behavior on FerretDB). Co-authored-by: Cursor * feat: add FerretDB compatibility test suite, retry utilities, and CI config Add comprehensive FerretDB integration test suite covering: - $pullAll scalar array operations - $pull with subdocument conditions - $lookup replacement (find + manual join) - $sample replacement (distinct + Fisher-Yates) - $bit and $bitsAllSet operations - Migration anti-join pattern - Multi-tenancy (useDb, scaling, write amplification) - Sharding proof-of-concept - Production operations (backup/restore, schema migration, deadlock retry) Add production retryWithBackoff utility for deadlock recovery during concurrent index creation on FerretDB/DocumentDB backends. Add UserController.spec.js tests for deleteUserController (runs in CI). Configure jest and eslint to isolate FerretDB tests from CI pipelines: - packages/data-schemas/jest.config.mjs: ignore misc/ directory - eslint.config.mjs: ignore packages/data-schemas/misc/ Include Docker Compose config for local FerretDB v2.7 + postgres-documentdb, dedicated jest/tsconfig for the test files, and multi-tenancy findings doc. Co-authored-by: Cursor * style: brace formatting in aclEntry.ts modifyPermissionBits Co-authored-by: Cursor * refactor: reorganize retry utilities and update imports - Moved retryWithBackoff utility to a new file `retry.ts` for better structure. - Updated imports in `orgOperations.ferretdb.spec.ts` to reflect the new location of retry utilities. - Removed old import statement for retryWithBackoff from index.ts to streamline exports. * test: add $pullAll coverage for ConversationTag and PermissionService Add integration tests for deleteConversationTag verifying $pullAll removes tags from conversations correctly, and for syncUserEntraGroupMemberships verifying $pullAll removes user from non-matching Entra groups while preserving local group membership. --------- Co-authored-by: Cursor --- api/models/Agent.js | 9 +- api/models/ConversationTag.js | 2 +- api/models/ConversationTag.spec.js | 114 +++ api/models/Project.js | 8 +- api/models/Prompt.js | 221 ++---- api/server/controllers/UserController.js | 6 +- api/server/controllers/UserController.spec.js | 208 ++++++ api/server/controllers/agents/v1.spec.js | 1 - api/server/services/PermissionService.js | 12 +- api/server/services/PermissionService.spec.js | 136 ++++ config/delete-user.js | 2 +- config/migrate-agent-permissions.js | 55 +- config/migrate-prompt-permissions.js | 55 +- e2e/setup/cleanupUser.ts | 3 +- eslint.config.mjs | 1 + packages/api/src/agents/migration.ts | 56 +- packages/api/src/prompts/migration.ts | 56 +- packages/data-schemas/jest.config.mjs | 1 + .../misc/ferretdb/aclBitops.ferretdb.spec.ts | 468 ++++++++++++ .../misc/ferretdb/docker-compose.ferretdb.yml | 21 + .../ferretdb/ferretdb-multitenancy-plan.md | 204 ++++++ .../misc/ferretdb/jest.ferretdb.config.mjs | 18 + .../migrationAntiJoin.ferretdb.spec.ts | 362 ++++++++++ .../ferretdb/multiTenancy.ferretdb.spec.ts | 649 +++++++++++++++++ .../ferretdb/orgOperations.ferretdb.spec.ts | 675 ++++++++++++++++++ .../ferretdb/promptLookup.ferretdb.spec.ts | 353 +++++++++ .../misc/ferretdb/pullAll.ferretdb.spec.ts | 297 ++++++++ .../ferretdb/pullSubdocument.ferretdb.spec.ts | 199 ++++++ .../ferretdb/randomPrompts.ferretdb.spec.ts | 210 ++++++ .../misc/ferretdb/sharding.ferretdb.spec.ts | 522 ++++++++++++++ .../data-schemas/misc/ferretdb/tsconfig.json | 14 + packages/data-schemas/src/methods/aclEntry.ts | 4 +- .../data-schemas/src/methods/userGroup.ts | 2 +- packages/data-schemas/src/utils/retry.ts | 122 ++++ .../data-schemas/src/utils/transactions.ts | 8 +- 35 files changed, 4727 insertions(+), 347 deletions(-) create mode 100644 api/models/ConversationTag.spec.js create mode 100644 api/server/controllers/UserController.spec.js create mode 100644 packages/data-schemas/misc/ferretdb/aclBitops.ferretdb.spec.ts create mode 100644 packages/data-schemas/misc/ferretdb/docker-compose.ferretdb.yml create mode 100644 packages/data-schemas/misc/ferretdb/ferretdb-multitenancy-plan.md create mode 100644 packages/data-schemas/misc/ferretdb/jest.ferretdb.config.mjs create mode 100644 packages/data-schemas/misc/ferretdb/migrationAntiJoin.ferretdb.spec.ts create mode 100644 packages/data-schemas/misc/ferretdb/multiTenancy.ferretdb.spec.ts create mode 100644 packages/data-schemas/misc/ferretdb/orgOperations.ferretdb.spec.ts create mode 100644 packages/data-schemas/misc/ferretdb/promptLookup.ferretdb.spec.ts create mode 100644 packages/data-schemas/misc/ferretdb/pullAll.ferretdb.spec.ts create mode 100644 packages/data-schemas/misc/ferretdb/pullSubdocument.ferretdb.spec.ts create mode 100644 packages/data-schemas/misc/ferretdb/randomPrompts.ferretdb.spec.ts create mode 100644 packages/data-schemas/misc/ferretdb/sharding.ferretdb.spec.ts create mode 100644 packages/data-schemas/misc/ferretdb/tsconfig.json create mode 100644 packages/data-schemas/src/utils/retry.ts diff --git a/api/models/Agent.js b/api/models/Agent.js index 663285183a..2467af25ac 100644 --- a/api/models/Agent.js +++ b/api/models/Agent.js @@ -546,16 +546,15 @@ const removeAgentResourceFiles = async ({ agent_id, files }) => { return acc; }, {}); - // Step 1: Atomically remove file IDs using $pull - const pullOps = {}; + const pullAllOps = {}; const resourcesToCheck = new Set(); for (const [resource, fileIds] of Object.entries(filesByResource)) { const fileIdsPath = `tool_resources.${resource}.file_ids`; - pullOps[fileIdsPath] = { $in: fileIds }; + pullAllOps[fileIdsPath] = fileIds; resourcesToCheck.add(resource); } - const updatePullData = { $pull: pullOps }; + const updatePullData = { $pullAll: pullAllOps }; const agentAfterPull = await Agent.findOneAndUpdate(searchParameter, updatePullData, { new: true, }).lean(); @@ -775,7 +774,7 @@ const updateAgentProjects = async ({ user, agentId, projectIds, removeProjectIds for (const projectId of removeProjectIds) { await removeAgentIdsFromProject(projectId, [agentId]); } - updateOps.$pull = { projectIds: { $in: removeProjectIds } }; + updateOps.$pullAll = { projectIds: removeProjectIds }; } if (projectIds && projectIds.length > 0) { diff --git a/api/models/ConversationTag.js b/api/models/ConversationTag.js index 47a6c2bbf5..99d0608a66 100644 --- a/api/models/ConversationTag.js +++ b/api/models/ConversationTag.js @@ -165,7 +165,7 @@ const deleteConversationTag = async (user, tag) => { return null; } - await Conversation.updateMany({ user, tags: tag }, { $pull: { tags: tag } }); + await Conversation.updateMany({ user, tags: tag }, { $pullAll: { tags: [tag] } }); await ConversationTag.updateMany( { user, position: { $gt: deletedTag.position } }, diff --git a/api/models/ConversationTag.spec.js b/api/models/ConversationTag.spec.js new file mode 100644 index 0000000000..bc7da919e1 --- /dev/null +++ b/api/models/ConversationTag.spec.js @@ -0,0 +1,114 @@ +const mongoose = require('mongoose'); +const { MongoMemoryServer } = require('mongodb-memory-server'); +const { ConversationTag, Conversation } = require('~/db/models'); +const { deleteConversationTag } = require('./ConversationTag'); + +let mongoServer; + +beforeAll(async () => { + mongoServer = await MongoMemoryServer.create(); + await mongoose.connect(mongoServer.getUri()); +}); + +afterAll(async () => { + await mongoose.disconnect(); + await mongoServer.stop(); +}); + +afterEach(async () => { + await ConversationTag.deleteMany({}); + await Conversation.deleteMany({}); +}); + +describe('ConversationTag model - $pullAll operations', () => { + const userId = new mongoose.Types.ObjectId().toString(); + + describe('deleteConversationTag', () => { + it('should remove the tag from all conversations that have it', async () => { + await ConversationTag.create({ tag: 'work', user: userId, position: 1 }); + + await Conversation.create([ + { conversationId: 'conv1', user: userId, endpoint: 'openAI', tags: ['work', 'important'] }, + { conversationId: 'conv2', user: userId, endpoint: 'openAI', tags: ['work'] }, + { conversationId: 'conv3', user: userId, endpoint: 'openAI', tags: ['personal'] }, + ]); + + await deleteConversationTag(userId, 'work'); + + const convos = await Conversation.find({ user: userId }).sort({ conversationId: 1 }).lean(); + expect(convos[0].tags).toEqual(['important']); + expect(convos[1].tags).toEqual([]); + expect(convos[2].tags).toEqual(['personal']); + }); + + it('should delete the tag document itself', async () => { + await ConversationTag.create({ tag: 'temp', user: userId, position: 1 }); + + const result = await deleteConversationTag(userId, 'temp'); + + expect(result).toBeDefined(); + expect(result.tag).toBe('temp'); + + const remaining = await ConversationTag.find({ user: userId }).lean(); + expect(remaining).toHaveLength(0); + }); + + it('should return null when the tag does not exist', async () => { + const result = await deleteConversationTag(userId, 'nonexistent'); + expect(result).toBeNull(); + }); + + it('should adjust positions of tags after the deleted one', async () => { + await ConversationTag.create([ + { tag: 'first', user: userId, position: 1 }, + { tag: 'second', user: userId, position: 2 }, + { tag: 'third', user: userId, position: 3 }, + ]); + + await deleteConversationTag(userId, 'first'); + + const tags = await ConversationTag.find({ user: userId }).sort({ position: 1 }).lean(); + expect(tags).toHaveLength(2); + expect(tags[0].tag).toBe('second'); + expect(tags[0].position).toBe(1); + expect(tags[1].tag).toBe('third'); + expect(tags[1].position).toBe(2); + }); + + it('should not affect conversations of other users', async () => { + const otherUser = new mongoose.Types.ObjectId().toString(); + + await ConversationTag.create({ tag: 'shared-name', user: userId, position: 1 }); + await ConversationTag.create({ tag: 'shared-name', user: otherUser, position: 1 }); + + await Conversation.create([ + { conversationId: 'mine', user: userId, endpoint: 'openAI', tags: ['shared-name'] }, + { conversationId: 'theirs', user: otherUser, endpoint: 'openAI', tags: ['shared-name'] }, + ]); + + await deleteConversationTag(userId, 'shared-name'); + + const myConvo = await Conversation.findOne({ conversationId: 'mine' }).lean(); + const theirConvo = await Conversation.findOne({ conversationId: 'theirs' }).lean(); + + expect(myConvo.tags).toEqual([]); + expect(theirConvo.tags).toEqual(['shared-name']); + }); + + it('should handle duplicate tags in conversations correctly', async () => { + await ConversationTag.create({ tag: 'dup', user: userId, position: 1 }); + + const conv = await Conversation.create({ + conversationId: 'conv-dup', + user: userId, + endpoint: 'openAI', + tags: ['dup', 'other', 'dup'], + }); + + await deleteConversationTag(userId, 'dup'); + + const updated = await Conversation.findById(conv._id).lean(); + expect(updated.tags).toEqual(['other']); + }); + }); +}); diff --git a/api/models/Project.js b/api/models/Project.js index 8fd1e556f9..dc92348b54 100644 --- a/api/models/Project.js +++ b/api/models/Project.js @@ -64,7 +64,7 @@ const addGroupIdsToProject = async function (projectId, promptGroupIds) { const removeGroupIdsFromProject = async function (projectId, promptGroupIds) { return await Project.findByIdAndUpdate( projectId, - { $pull: { promptGroupIds: { $in: promptGroupIds } } }, + { $pullAll: { promptGroupIds: promptGroupIds } }, { new: true }, ); }; @@ -76,7 +76,7 @@ const removeGroupIdsFromProject = async function (projectId, promptGroupIds) { * @returns {Promise} */ const removeGroupFromAllProjects = async (promptGroupId) => { - await Project.updateMany({}, { $pull: { promptGroupIds: promptGroupId } }); + await Project.updateMany({}, { $pullAll: { promptGroupIds: [promptGroupId] } }); }; /** @@ -104,7 +104,7 @@ const addAgentIdsToProject = async function (projectId, agentIds) { const removeAgentIdsFromProject = async function (projectId, agentIds) { return await Project.findByIdAndUpdate( projectId, - { $pull: { agentIds: { $in: agentIds } } }, + { $pullAll: { agentIds: agentIds } }, { new: true }, ); }; @@ -116,7 +116,7 @@ const removeAgentIdsFromProject = async function (projectId, agentIds) { * @returns {Promise} */ const removeAgentFromAllProjects = async (agentId) => { - await Project.updateMany({}, { $pull: { agentIds: agentId } }); + await Project.updateMany({}, { $pullAll: { agentIds: [agentId] } }); }; module.exports = { diff --git a/api/models/Prompt.js b/api/models/Prompt.js index bde911b23a..83e82c5be6 100644 --- a/api/models/Prompt.js +++ b/api/models/Prompt.js @@ -17,83 +17,25 @@ const { removeAllPermissions } = require('~/server/services/PermissionService'); const { PromptGroup, Prompt, AclEntry } = require('~/db/models'); /** - * Create a pipeline for the aggregation to get prompt groups - * @param {Object} query - * @param {number} skip - * @param {number} limit - * @returns {[Object]} - The pipeline for the aggregation + * Batch-fetches production prompts for an array of prompt groups + * and attaches them as `productionPrompt` field. + * Replaces $lookup aggregation for FerretDB compatibility. */ -const createGroupPipeline = (query, skip, limit) => { - return [ - { $match: query }, - { $sort: { createdAt: -1 } }, - { $skip: skip }, - { $limit: limit }, - { - $lookup: { - from: 'prompts', - localField: 'productionId', - foreignField: '_id', - as: 'productionPrompt', - }, - }, - { $unwind: { path: '$productionPrompt', preserveNullAndEmptyArrays: true } }, - { - $project: { - name: 1, - numberOfGenerations: 1, - oneliner: 1, - category: 1, - projectIds: 1, - productionId: 1, - author: 1, - authorName: 1, - createdAt: 1, - updatedAt: 1, - 'productionPrompt.prompt': 1, - // 'productionPrompt._id': 1, - // 'productionPrompt.type': 1, - }, - }, - ]; -}; +const attachProductionPrompts = async (groups) => { + const uniqueIds = [...new Set(groups.map((g) => g.productionId?.toString()).filter(Boolean))]; + if (uniqueIds.length === 0) { + return groups.map((g) => ({ ...g, productionPrompt: null })); + } -/** - * Create a pipeline for the aggregation to get all prompt groups - * @param {Object} query - * @param {Partial} $project - * @returns {[Object]} - The pipeline for the aggregation - */ -const createAllGroupsPipeline = ( - query, - $project = { - name: 1, - oneliner: 1, - category: 1, - author: 1, - authorName: 1, - createdAt: 1, - updatedAt: 1, - command: 1, - 'productionPrompt.prompt': 1, - }, -) => { - return [ - { $match: query }, - { $sort: { createdAt: -1 } }, - { - $lookup: { - from: 'prompts', - localField: 'productionId', - foreignField: '_id', - as: 'productionPrompt', - }, - }, - { $unwind: { path: '$productionPrompt', preserveNullAndEmptyArrays: true } }, - { - $project, - }, - ]; + const prompts = await Prompt.find({ _id: { $in: uniqueIds } }) + .select('prompt') + .lean(); + const promptMap = new Map(prompts.map((p) => [p._id.toString(), p])); + + return groups.map((g) => ({ + ...g, + productionPrompt: g.productionId ? (promptMap.get(g.productionId.toString()) ?? null) : null, + })); }; /** @@ -134,8 +76,11 @@ const getAllPromptGroups = async (req, filter) => { } } - const promptGroupsPipeline = createAllGroupsPipeline(combinedQuery); - return await PromptGroup.aggregate(promptGroupsPipeline).exec(); + const groups = await PromptGroup.find(combinedQuery) + .sort({ createdAt: -1 }) + .select('name oneliner category author authorName createdAt updatedAt command productionId') + .lean(); + return await attachProductionPrompts(groups); } catch (error) { console.error('Error getting all prompt groups', error); return { message: 'Error getting all prompt groups' }; @@ -175,7 +120,6 @@ const getPromptGroups = async (req, filter) => { let combinedQuery = query; if (searchShared) { - // const projects = req.user.projects || []; // TODO: handle multiple projects const project = await getProjectByName(Constants.GLOBAL_PROJECT_NAME, 'promptGroupIds'); if (project && project.promptGroupIds && project.promptGroupIds.length > 0) { const projectQuery = { _id: { $in: project.promptGroupIds }, ...query }; @@ -187,17 +131,19 @@ const getPromptGroups = async (req, filter) => { const skip = (validatedPageNumber - 1) * validatedPageSize; const limit = validatedPageSize; - const promptGroupsPipeline = createGroupPipeline(combinedQuery, skip, limit); - const totalPromptGroupsPipeline = [{ $match: combinedQuery }, { $count: 'total' }]; - - const [promptGroupsResults, totalPromptGroupsResults] = await Promise.all([ - PromptGroup.aggregate(promptGroupsPipeline).exec(), - PromptGroup.aggregate(totalPromptGroupsPipeline).exec(), + const [groups, totalPromptGroups] = await Promise.all([ + PromptGroup.find(combinedQuery) + .sort({ createdAt: -1 }) + .skip(skip) + .limit(limit) + .select( + 'name numberOfGenerations oneliner category projectIds productionId author authorName createdAt updatedAt', + ) + .lean(), + PromptGroup.countDocuments(combinedQuery), ]); - const promptGroups = promptGroupsResults; - const totalPromptGroups = - totalPromptGroupsResults.length > 0 ? totalPromptGroupsResults[0].total : 0; + const promptGroups = await attachProductionPrompts(groups); return { promptGroups, @@ -265,10 +211,8 @@ async function getListPromptGroupsByAccess({ const isPaginated = limit !== null && limit !== undefined; const normalizedLimit = isPaginated ? Math.min(Math.max(1, parseInt(limit) || 20), 100) : null; - // Build base query combining ACL accessible prompt groups with other filters const baseQuery = { ...otherParams, _id: { $in: accessibleIds } }; - // Add cursor condition if (after && typeof after === 'string' && after !== 'undefined' && after !== 'null') { try { const cursor = JSON.parse(Buffer.from(after, 'base64').toString('utf8')); @@ -281,10 +225,8 @@ async function getListPromptGroupsByAccess({ ], }; - // Merge cursor condition with base query if (Object.keys(baseQuery).length > 0) { baseQuery.$and = [{ ...baseQuery }, cursorCondition]; - // Remove the original conditions from baseQuery to avoid duplication Object.keys(baseQuery).forEach((key) => { if (key !== '$and') delete baseQuery[key]; }); @@ -296,43 +238,18 @@ async function getListPromptGroupsByAccess({ } } - // Build aggregation pipeline - const pipeline = [{ $match: baseQuery }, { $sort: { updatedAt: -1, _id: 1 } }]; + const findQuery = PromptGroup.find(baseQuery) + .sort({ updatedAt: -1, _id: 1 }) + .select( + 'name numberOfGenerations oneliner category projectIds productionId author authorName createdAt updatedAt', + ); - // Only apply limit if pagination is requested if (isPaginated) { - pipeline.push({ $limit: normalizedLimit + 1 }); + findQuery.limit(normalizedLimit + 1); } - // Add lookup for production prompt - pipeline.push( - { - $lookup: { - from: 'prompts', - localField: 'productionId', - foreignField: '_id', - as: 'productionPrompt', - }, - }, - { $unwind: { path: '$productionPrompt', preserveNullAndEmptyArrays: true } }, - { - $project: { - name: 1, - numberOfGenerations: 1, - oneliner: 1, - category: 1, - projectIds: 1, - productionId: 1, - author: 1, - authorName: 1, - createdAt: 1, - updatedAt: 1, - 'productionPrompt.prompt': 1, - }, - }, - ); - - const promptGroups = await PromptGroup.aggregate(pipeline).exec(); + const groups = await findQuery.lean(); + const promptGroups = await attachProductionPrompts(groups); const hasMore = isPaginated ? promptGroups.length > normalizedLimit : false; const data = (isPaginated ? promptGroups.slice(0, normalizedLimit) : promptGroups).map( @@ -344,7 +261,6 @@ async function getListPromptGroupsByAccess({ }, ); - // Generate next cursor only if paginated let nextCursor = null; if (isPaginated && hasMore && data.length > 0) { const lastGroup = promptGroups[normalizedLimit - 1]; @@ -477,32 +393,33 @@ module.exports = { */ getRandomPromptGroups: async (filter) => { try { - const result = await PromptGroup.aggregate([ - { - $match: { - category: { $ne: '' }, - }, - }, - { - $group: { - _id: '$category', - promptGroup: { $first: '$$ROOT' }, - }, - }, - { - $replaceRoot: { newRoot: '$promptGroup' }, - }, - { - $sample: { size: +filter.limit + +filter.skip }, - }, - { - $skip: +filter.skip, - }, - { - $limit: +filter.limit, - }, - ]); - return { prompts: result }; + const categories = await PromptGroup.distinct('category', { category: { $ne: '' } }); + + for (let i = categories.length - 1; i > 0; i--) { + const j = Math.floor(Math.random() * (i + 1)); + [categories[i], categories[j]] = [categories[j], categories[i]]; + } + + const skip = +filter.skip; + const limit = +filter.limit; + const selectedCategories = categories.slice(skip, skip + limit); + + if (selectedCategories.length === 0) { + return { prompts: [] }; + } + + const groups = await PromptGroup.find({ category: { $in: selectedCategories } }).lean(); + + const groupByCategory = new Map(); + for (const group of groups) { + if (!groupByCategory.has(group.category)) { + groupByCategory.set(group.category, group); + } + } + + const prompts = selectedCategories.map((cat) => groupByCategory.get(cat)).filter(Boolean); + + return { prompts }; } catch (error) { logger.error('Error getting prompt groups', error); return { message: 'Error getting prompt groups' }; @@ -635,7 +552,7 @@ module.exports = { await removeGroupIdsFromProject(projectId, [filter._id]); } - updateOps.$pull = { projectIds: { $in: data.removeProjectIds } }; + updateOps.$pullAll = { projectIds: data.removeProjectIds }; delete data.removeProjectIds; } diff --git a/api/server/controllers/UserController.js b/api/server/controllers/UserController.js index 7a9dd8125e..041a2bc845 100644 --- a/api/server/controllers/UserController.js +++ b/api/server/controllers/UserController.js @@ -266,11 +266,7 @@ const deleteUserController = async (req, res) => { await deleteUserPrompts(req, user.id); // delete user prompts await Action.deleteMany({ user: user.id }); // delete user actions await Token.deleteMany({ userId: user.id }); // delete user OAuth tokens - await Group.updateMany( - // remove user from all groups - { memberIds: user.id }, - { $pull: { memberIds: user.id } }, - ); + await Group.updateMany({ memberIds: user.id }, { $pullAll: { memberIds: [user.id] } }); await AclEntry.deleteMany({ principalId: user._id }); // delete user ACL entries logger.info(`User deleted account. Email: ${user.email} ID: ${user.id}`); res.status(200).send({ message: 'User deleted' }); diff --git a/api/server/controllers/UserController.spec.js b/api/server/controllers/UserController.spec.js new file mode 100644 index 0000000000..cf5d971e02 --- /dev/null +++ b/api/server/controllers/UserController.spec.js @@ -0,0 +1,208 @@ +const mongoose = require('mongoose'); +const { MongoMemoryServer } = require('mongodb-memory-server'); + +jest.mock('@librechat/data-schemas', () => { + const actual = jest.requireActual('@librechat/data-schemas'); + return { + ...actual, + logger: { + debug: jest.fn(), + error: jest.fn(), + warn: jest.fn(), + info: jest.fn(), + }, + }; +}); + +jest.mock('~/models', () => ({ + deleteAllUserSessions: jest.fn().mockResolvedValue(undefined), + deleteAllSharedLinks: jest.fn().mockResolvedValue(undefined), + updateUserPlugins: jest.fn(), + deleteUserById: jest.fn().mockResolvedValue(undefined), + deleteMessages: jest.fn().mockResolvedValue(undefined), + deletePresets: jest.fn().mockResolvedValue(undefined), + deleteUserKey: jest.fn().mockResolvedValue(undefined), + deleteConvos: jest.fn().mockResolvedValue(undefined), + deleteFiles: jest.fn().mockResolvedValue(undefined), + updateUser: jest.fn(), + findToken: jest.fn(), + getFiles: jest.fn().mockResolvedValue([]), +})); + +jest.mock('~/server/services/PluginService', () => ({ + updateUserPluginAuth: jest.fn(), + deleteUserPluginAuth: jest.fn().mockResolvedValue(undefined), +})); + +jest.mock('~/server/services/AuthService', () => ({ + verifyEmail: jest.fn(), + resendVerificationEmail: jest.fn(), +})); + +jest.mock('~/server/services/Files/S3/crud', () => ({ + needsRefresh: jest.fn(), + getNewS3URL: jest.fn(), +})); + +jest.mock('~/server/services/Files/process', () => ({ + processDeleteRequest: jest.fn().mockResolvedValue(undefined), +})); + +jest.mock('~/server/services/Config', () => ({ + getAppConfig: jest.fn().mockResolvedValue({}), + getMCPManager: jest.fn(), + getFlowStateManager: jest.fn(), + getMCPServersRegistry: jest.fn(), +})); + +jest.mock('~/models/ToolCall', () => ({ + deleteToolCalls: jest.fn().mockResolvedValue(undefined), +})); + +jest.mock('~/models/Prompt', () => ({ + deleteUserPrompts: jest.fn().mockResolvedValue(undefined), +})); + +jest.mock('~/models/Agent', () => ({ + deleteUserAgents: jest.fn().mockResolvedValue(undefined), +})); + +jest.mock('~/cache', () => ({ + getLogStores: jest.fn(), +})); + +let mongoServer; + +beforeAll(async () => { + mongoServer = await MongoMemoryServer.create(); + await mongoose.connect(mongoServer.getUri()); +}); + +afterAll(async () => { + await mongoose.disconnect(); + await mongoServer.stop(); +}); + +afterEach(async () => { + const collections = mongoose.connection.collections; + for (const key in collections) { + await collections[key].deleteMany({}); + } +}); + +const { deleteUserController } = require('./UserController'); +const { Group } = require('~/db/models'); +const { deleteConvos } = require('~/models'); + +describe('deleteUserController', () => { + const mockRes = { + status: jest.fn().mockReturnThis(), + send: jest.fn().mockReturnThis(), + json: jest.fn().mockReturnThis(), + }; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('should return 200 on successful deletion', async () => { + const userId = new mongoose.Types.ObjectId(); + const req = { user: { id: userId.toString(), _id: userId, email: 'test@test.com' } }; + + await deleteUserController(req, mockRes); + + expect(mockRes.status).toHaveBeenCalledWith(200); + expect(mockRes.send).toHaveBeenCalledWith({ message: 'User deleted' }); + }); + + it('should remove the user from all groups via $pullAll', async () => { + const userId = new mongoose.Types.ObjectId(); + const userIdStr = userId.toString(); + const otherUser = new mongoose.Types.ObjectId().toString(); + + await Group.create([ + { name: 'Group A', memberIds: [userIdStr, otherUser], source: 'local' }, + { name: 'Group B', memberIds: [userIdStr], source: 'local' }, + { name: 'Group C', memberIds: [otherUser], source: 'local' }, + ]); + + const req = { user: { id: userIdStr, _id: userId, email: 'del@test.com' } }; + await deleteUserController(req, mockRes); + + const groups = await Group.find({}).sort({ name: 1 }).lean(); + expect(groups[0].memberIds).toEqual([otherUser]); + expect(groups[1].memberIds).toEqual([]); + expect(groups[2].memberIds).toEqual([otherUser]); + }); + + it('should handle user that exists in no groups', async () => { + const userId = new mongoose.Types.ObjectId(); + await Group.create({ name: 'Empty', memberIds: ['someone-else'], source: 'local' }); + + const req = { user: { id: userId.toString(), _id: userId, email: 'no-groups@test.com' } }; + await deleteUserController(req, mockRes); + + expect(mockRes.status).toHaveBeenCalledWith(200); + const group = await Group.findOne({ name: 'Empty' }).lean(); + expect(group.memberIds).toEqual(['someone-else']); + }); + + it('should remove duplicate memberIds if the user appears more than once', async () => { + const userId = new mongoose.Types.ObjectId(); + const userIdStr = userId.toString(); + + await Group.create({ + name: 'Dupes', + memberIds: [userIdStr, 'other', userIdStr], + source: 'local', + }); + + const req = { user: { id: userIdStr, _id: userId, email: 'dupe@test.com' } }; + await deleteUserController(req, mockRes); + + const group = await Group.findOne({ name: 'Dupes' }).lean(); + expect(group.memberIds).toEqual(['other']); + }); + + it('should still succeed when deleteConvos throws', async () => { + const userId = new mongoose.Types.ObjectId(); + deleteConvos.mockRejectedValueOnce(new Error('no convos')); + + const req = { user: { id: userId.toString(), _id: userId, email: 'convos@test.com' } }; + await deleteUserController(req, mockRes); + + expect(mockRes.status).toHaveBeenCalledWith(200); + expect(mockRes.send).toHaveBeenCalledWith({ message: 'User deleted' }); + }); + + it('should return 500 when a critical operation fails', async () => { + const userId = new mongoose.Types.ObjectId(); + const { deleteMessages } = require('~/models'); + deleteMessages.mockRejectedValueOnce(new Error('db down')); + + const req = { user: { id: userId.toString(), _id: userId, email: 'fail@test.com' } }; + await deleteUserController(req, mockRes); + + expect(mockRes.status).toHaveBeenCalledWith(500); + expect(mockRes.json).toHaveBeenCalledWith({ message: 'Something went wrong.' }); + }); + + it('should use string user.id (not ObjectId user._id) for memberIds removal', async () => { + const userId = new mongoose.Types.ObjectId(); + const userIdStr = userId.toString(); + const otherUser = 'other-user-id'; + + await Group.create({ + name: 'StringCheck', + memberIds: [userIdStr, otherUser], + source: 'local', + }); + + const req = { user: { id: userIdStr, _id: userId, email: 'stringcheck@test.com' } }; + await deleteUserController(req, mockRes); + + const group = await Group.findOne({ name: 'StringCheck' }).lean(); + expect(group.memberIds).toEqual([otherUser]); + expect(group.memberIds).not.toContain(userIdStr); + }); +}); diff --git a/api/server/controllers/agents/v1.spec.js b/api/server/controllers/agents/v1.spec.js index 8b2a57d903..e8eab369e8 100644 --- a/api/server/controllers/agents/v1.spec.js +++ b/api/server/controllers/agents/v1.spec.js @@ -557,7 +557,6 @@ describe('Agent Controllers - Mass Assignment Protection', () => { const updatedAgent = mockRes.json.mock.calls[0][0]; expect(updatedAgent).toBeDefined(); - // Note: updateAgentProjects requires more setup, so we just verify the handler doesn't crash }); test('should validate tool_resources in updates', async () => { diff --git a/api/server/services/PermissionService.js b/api/server/services/PermissionService.js index a843f48f6f..eb2fe493ed 100644 --- a/api/server/services/PermissionService.js +++ b/api/server/services/PermissionService.js @@ -536,7 +536,7 @@ const syncUserEntraGroupMemberships = async (user, accessToken, session = null) memberIds: user.idOnTheSource, idOnTheSource: { $nin: allGroupIds }, }, - { $pull: { memberIds: user.idOnTheSource } }, + { $pullAll: { memberIds: [user.idOnTheSource] } }, sessionOptions, ); } catch (error) { @@ -788,7 +788,15 @@ const bulkUpdateResourcePermissions = async ({ return results; } catch (error) { if (shouldEndSession && supportsTransactions) { - await localSession.abortTransaction(); + try { + await localSession.abortTransaction(); + } catch (transactionError) { + /** best-effort abort; may fail if commit already succeeded */ + logger.error( + `[PermissionService.bulkUpdateResourcePermissions] Error aborting transaction:`, + transactionError, + ); + } } logger.error(`[PermissionService.bulkUpdateResourcePermissions] Error: ${error.message}`); throw error; diff --git a/api/server/services/PermissionService.spec.js b/api/server/services/PermissionService.spec.js index b41780f345..477b0702b9 100644 --- a/api/server/services/PermissionService.spec.js +++ b/api/server/services/PermissionService.spec.js @@ -9,6 +9,7 @@ const { } = require('librechat-data-provider'); const { bulkUpdateResourcePermissions, + syncUserEntraGroupMemberships, getEffectivePermissions, findAccessibleResources, getAvailableRoles, @@ -26,7 +27,11 @@ jest.mock('@librechat/data-schemas', () => ({ // Mock GraphApiService to prevent config loading issues jest.mock('~/server/services/GraphApiService', () => ({ + entraIdPrincipalFeatureEnabled: jest.fn().mockReturnValue(false), + getUserOwnedEntraGroups: jest.fn().mockResolvedValue([]), + getUserEntraGroups: jest.fn().mockResolvedValue([]), getGroupMembers: jest.fn().mockResolvedValue([]), + getGroupOwners: jest.fn().mockResolvedValue([]), })); // Mock the logger @@ -1933,3 +1938,134 @@ describe('PermissionService', () => { }); }); }); + +describe('syncUserEntraGroupMemberships - $pullAll on Group.memberIds', () => { + const { + entraIdPrincipalFeatureEnabled, + getUserEntraGroups, + } = require('~/server/services/GraphApiService'); + const { Group } = require('~/db/models'); + + const userEntraId = 'entra-user-001'; + const user = { + openidId: 'openid-sub-001', + idOnTheSource: userEntraId, + provider: 'openid', + }; + + beforeEach(async () => { + await Group.deleteMany({}); + entraIdPrincipalFeatureEnabled.mockReturnValue(true); + }); + + afterEach(() => { + entraIdPrincipalFeatureEnabled.mockReturnValue(false); + getUserEntraGroups.mockResolvedValue([]); + }); + + it('should add user to matching Entra groups and remove from non-matching ones', async () => { + await Group.create([ + { name: 'Group A', source: 'entra', idOnTheSource: 'entra-group-a', memberIds: [] }, + { + name: 'Group B', + source: 'entra', + idOnTheSource: 'entra-group-b', + memberIds: [userEntraId], + }, + { + name: 'Group C', + source: 'entra', + idOnTheSource: 'entra-group-c', + memberIds: [userEntraId], + }, + ]); + + getUserEntraGroups.mockResolvedValue(['entra-group-a', 'entra-group-c']); + + await syncUserEntraGroupMemberships(user, 'fake-access-token'); + + const groups = await Group.find({ source: 'entra' }).sort({ name: 1 }).lean(); + expect(groups[0].memberIds).toContain(userEntraId); + expect(groups[1].memberIds).not.toContain(userEntraId); + expect(groups[2].memberIds).toContain(userEntraId); + }); + + it('should not modify groups when API returns empty list (early return)', async () => { + await Group.create([ + { + name: 'Group X', + source: 'entra', + idOnTheSource: 'entra-x', + memberIds: [userEntraId, 'other-user'], + }, + { name: 'Group Y', source: 'entra', idOnTheSource: 'entra-y', memberIds: [userEntraId] }, + ]); + + getUserEntraGroups.mockResolvedValue([]); + + await syncUserEntraGroupMemberships(user, 'fake-token'); + + const groups = await Group.find({ source: 'entra' }).sort({ name: 1 }).lean(); + expect(groups[0].memberIds).toContain(userEntraId); + expect(groups[0].memberIds).toContain('other-user'); + expect(groups[1].memberIds).toContain(userEntraId); + }); + + it('should remove user from groups not in the API response via $pullAll', async () => { + await Group.create([ + { name: 'Keep', source: 'entra', idOnTheSource: 'entra-keep', memberIds: [userEntraId] }, + { + name: 'Remove', + source: 'entra', + idOnTheSource: 'entra-remove', + memberIds: [userEntraId, 'other-user'], + }, + ]); + + getUserEntraGroups.mockResolvedValue(['entra-keep']); + + await syncUserEntraGroupMemberships(user, 'fake-token'); + + const keep = await Group.findOne({ idOnTheSource: 'entra-keep' }).lean(); + const remove = await Group.findOne({ idOnTheSource: 'entra-remove' }).lean(); + expect(keep.memberIds).toContain(userEntraId); + expect(remove.memberIds).not.toContain(userEntraId); + expect(remove.memberIds).toContain('other-user'); + }); + + it('should not modify local groups', async () => { + await Group.create([ + { name: 'Local Group', source: 'local', memberIds: [userEntraId] }, + { + name: 'Entra Group', + source: 'entra', + idOnTheSource: 'entra-only', + memberIds: [userEntraId], + }, + ]); + + getUserEntraGroups.mockResolvedValue([]); + + await syncUserEntraGroupMemberships(user, 'fake-token'); + + const localGroup = await Group.findOne({ source: 'local' }).lean(); + expect(localGroup.memberIds).toContain(userEntraId); + }); + + it('should early-return when feature is disabled', async () => { + entraIdPrincipalFeatureEnabled.mockReturnValue(false); + + await Group.create({ + name: 'Should Not Touch', + source: 'entra', + idOnTheSource: 'entra-safe', + memberIds: [userEntraId], + }); + + getUserEntraGroups.mockResolvedValue([]); + await syncUserEntraGroupMemberships(user, 'fake-token'); + + const group = await Group.findOne({ idOnTheSource: 'entra-safe' }).lean(); + expect(group.memberIds).toContain(userEntraId); + }); +}); diff --git a/config/delete-user.js b/config/delete-user.js index 5ad85577a4..66e325d1ee 100644 --- a/config/delete-user.js +++ b/config/delete-user.js @@ -107,7 +107,7 @@ async function gracefulExit(code = 0) { await Promise.all(tasks); // 6) Remove user from all groups - await Group.updateMany({ memberIds: user._id }, { $pull: { memberIds: user._id } }); + await Group.updateMany({ memberIds: uid }, { $pullAll: { memberIds: [uid] } }); // 7) Finally delete the user document itself await User.deleteOne({ _id: uid }); diff --git a/config/migrate-agent-permissions.js b/config/migrate-agent-permissions.js index b206c648ca..b511fba50f 100644 --- a/config/migrate-agent-permissions.js +++ b/config/migrate-agent-permissions.js @@ -10,7 +10,7 @@ const connect = require('./connect'); const { grantPermission } = require('~/server/services/PermissionService'); const { getProjectByName } = require('~/models/Project'); const { findRoleByIdentifier } = require('~/models'); -const { Agent } = require('~/db/models'); +const { Agent, AclEntry } = require('~/db/models'); async function migrateAgentPermissionsEnhanced({ dryRun = true, batchSize = 100 } = {}) { await connect(); @@ -39,48 +39,17 @@ async function migrateAgentPermissionsEnhanced({ dryRun = true, batchSize = 100 logger.info(`Found ${globalAgentIds.size} agents in global project`); - // Find agents without ACL entries using DocumentDB-compatible approach - const agentsToMigrate = await Agent.aggregate([ - { - $lookup: { - from: 'aclentries', - localField: '_id', - foreignField: 'resourceId', - as: 'aclEntries', - }, - }, - { - $addFields: { - userAclEntries: { - $filter: { - input: '$aclEntries', - as: 'aclEntry', - cond: { - $and: [ - { $eq: ['$$aclEntry.resourceType', ResourceType.AGENT] }, - { $eq: ['$$aclEntry.principalType', PrincipalType.USER] }, - ], - }, - }, - }, - }, - }, - { - $match: { - author: { $exists: true, $ne: null }, - userAclEntries: { $size: 0 }, - }, - }, - { - $project: { - _id: 1, - id: 1, - name: 1, - author: 1, - isCollaborative: 1, - }, - }, - ]); + const migratedAgentIds = await AclEntry.distinct('resourceId', { + resourceType: ResourceType.AGENT, + principalType: PrincipalType.USER, + }); + + const agentsToMigrate = await Agent.find({ + _id: { $nin: migratedAgentIds }, + author: { $exists: true, $ne: null }, + }) + .select('_id id name author isCollaborative') + .lean(); const categories = { globalEditAccess: [], // Global project + collaborative -> Public EDIT diff --git a/config/migrate-prompt-permissions.js b/config/migrate-prompt-permissions.js index 6018b16631..d86ee92f08 100644 --- a/config/migrate-prompt-permissions.js +++ b/config/migrate-prompt-permissions.js @@ -10,7 +10,7 @@ const connect = require('./connect'); const { grantPermission } = require('~/server/services/PermissionService'); const { getProjectByName } = require('~/models/Project'); const { findRoleByIdentifier } = require('~/models'); -const { PromptGroup } = require('~/db/models'); +const { PromptGroup, AclEntry } = require('~/db/models'); async function migrateToPromptGroupPermissions({ dryRun = true, batchSize = 100 } = {}) { await connect(); @@ -41,48 +41,17 @@ async function migrateToPromptGroupPermissions({ dryRun = true, batchSize = 100 logger.info(`Found ${globalPromptGroupIds.size} prompt groups in global project`); - // Find promptGroups without ACL entries - const promptGroupsToMigrate = await PromptGroup.aggregate([ - { - $lookup: { - from: 'aclentries', - localField: '_id', - foreignField: 'resourceId', - as: 'aclEntries', - }, - }, - { - $addFields: { - promptGroupAclEntries: { - $filter: { - input: '$aclEntries', - as: 'aclEntry', - cond: { - $and: [ - { $eq: ['$$aclEntry.resourceType', ResourceType.PROMPTGROUP] }, - { $eq: ['$$aclEntry.principalType', PrincipalType.USER] }, - ], - }, - }, - }, - }, - }, - { - $match: { - author: { $exists: true, $ne: null }, - promptGroupAclEntries: { $size: 0 }, - }, - }, - { - $project: { - _id: 1, - name: 1, - author: 1, - authorName: 1, - category: 1, - }, - }, - ]); + const migratedGroupIds = await AclEntry.distinct('resourceId', { + resourceType: ResourceType.PROMPTGROUP, + principalType: PrincipalType.USER, + }); + + const promptGroupsToMigrate = await PromptGroup.find({ + _id: { $nin: migratedGroupIds }, + author: { $exists: true, $ne: null }, + }) + .select('_id name author authorName category') + .lean(); const categories = { globalViewAccess: [], // PromptGroup in global project -> Public VIEW diff --git a/e2e/setup/cleanupUser.ts b/e2e/setup/cleanupUser.ts index 20ad661a5d..2e3de7d735 100644 --- a/e2e/setup/cleanupUser.ts +++ b/e2e/setup/cleanupUser.ts @@ -46,7 +46,8 @@ export default async function cleanupUser(user: TUser) { await Transaction.deleteMany({ user: userId }); await Token.deleteMany({ userId: userId }); await AclEntry.deleteMany({ principalId: userId }); - await Group.updateMany({ memberIds: userId }, { $pull: { memberIds: userId } }); + const userIdStr = userId.toString(); + await Group.updateMany({ memberIds: userIdStr }, { $pullAll: { memberIds: [userIdStr] } }); await User.deleteMany({ _id: userId }); console.log('πŸ€–: βœ… Deleted user from Database'); diff --git a/eslint.config.mjs b/eslint.config.mjs index 9990e0fc35..fc69450d1d 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -39,6 +39,7 @@ export default [ 'packages/data-provider/dist/**/*', 'packages/data-provider/test_bundle/**/*', 'packages/data-schemas/dist/**/*', + 'packages/data-schemas/misc/**/*', 'data-node/**/*', 'meili_data/**/*', '**/node_modules/**/*', diff --git a/packages/api/src/agents/migration.ts b/packages/api/src/agents/migration.ts index f8cad88b66..4da3852f82 100644 --- a/packages/api/src/agents/migration.ts +++ b/packages/api/src/agents/migration.ts @@ -24,7 +24,7 @@ export interface MigrationCheckParams { } interface AgentMigrationData { - _id: string; + _id: unknown; id: string; name: string; author: string; @@ -81,48 +81,18 @@ export async function checkAgentPermissionsMigration({ const globalProject = await methods.getProjectByName(GLOBAL_PROJECT_NAME, ['agentIds']); const globalAgentIds = new Set(globalProject?.agentIds || []); - // Find agents without ACL entries (no batching for efficiency on startup) - const agentsToMigrate: AgentMigrationData[] = await AgentModel.aggregate([ - { - $lookup: { - from: 'aclentries', - localField: '_id', - foreignField: 'resourceId', - as: 'aclEntries', - }, - }, - { - $addFields: { - userAclEntries: { - $filter: { - input: '$aclEntries', - as: 'aclEntry', - cond: { - $and: [ - { $eq: ['$$aclEntry.resourceType', ResourceType.AGENT] }, - { $eq: ['$$aclEntry.principalType', PrincipalType.USER] }, - ], - }, - }, - }, - }, - }, - { - $match: { - author: { $exists: true, $ne: null }, - userAclEntries: { $size: 0 }, - }, - }, - { - $project: { - _id: 1, - id: 1, - name: 1, - author: 1, - isCollaborative: 1, - }, - }, - ]); + const AclEntry = mongoose.model('AclEntry'); + const migratedAgentIds = await AclEntry.distinct('resourceId', { + resourceType: ResourceType.AGENT, + principalType: PrincipalType.USER, + }); + + const agentsToMigrate = (await AgentModel.find({ + _id: { $nin: migratedAgentIds }, + author: { $exists: true, $ne: null }, + }) + .select('_id id name author isCollaborative') + .lean()) as unknown as AgentMigrationData[]; const categories: { globalEditAccess: AgentMigrationData[]; diff --git a/packages/api/src/prompts/migration.ts b/packages/api/src/prompts/migration.ts index 40f0a585d7..a9e71d427a 100644 --- a/packages/api/src/prompts/migration.ts +++ b/packages/api/src/prompts/migration.ts @@ -24,7 +24,7 @@ export interface PromptMigrationCheckParams { } interface PromptGroupMigrationData { - _id: string; + _id: { toString(): string }; name: string; author: string; authorName?: string; @@ -81,48 +81,18 @@ export async function checkPromptPermissionsMigration({ (globalProject?.promptGroupIds || []).map((id) => id.toString()), ); - // Find promptGroups without ACL entries (no batching for efficiency on startup) - const promptGroupsToMigrate: PromptGroupMigrationData[] = await PromptGroupModel.aggregate([ - { - $lookup: { - from: 'aclentries', - localField: '_id', - foreignField: 'resourceId', - as: 'aclEntries', - }, - }, - { - $addFields: { - promptGroupAclEntries: { - $filter: { - input: '$aclEntries', - as: 'aclEntry', - cond: { - $and: [ - { $eq: ['$$aclEntry.resourceType', ResourceType.PROMPTGROUP] }, - { $eq: ['$$aclEntry.principalType', PrincipalType.USER] }, - ], - }, - }, - }, - }, - }, - { - $match: { - author: { $exists: true, $ne: null }, - promptGroupAclEntries: { $size: 0 }, - }, - }, - { - $project: { - _id: 1, - name: 1, - author: 1, - authorName: 1, - category: 1, - }, - }, - ]); + const AclEntry = mongoose.model('AclEntry'); + const migratedGroupIds = await AclEntry.distinct('resourceId', { + resourceType: ResourceType.PROMPTGROUP, + principalType: PrincipalType.USER, + }); + + const promptGroupsToMigrate = (await PromptGroupModel.find({ + _id: { $nin: migratedGroupIds }, + author: { $exists: true, $ne: null }, + }) + .select('_id name author authorName category') + .lean()) as unknown as PromptGroupMigrationData[]; const categories: { globalViewAccess: PromptGroupMigrationData[]; diff --git a/packages/data-schemas/jest.config.mjs b/packages/data-schemas/jest.config.mjs index b1fae43705..953848a3db 100644 --- a/packages/data-schemas/jest.config.mjs +++ b/packages/data-schemas/jest.config.mjs @@ -1,6 +1,7 @@ export default { collectCoverageFrom: ['src/**/*.{js,jsx,ts,tsx}', '!/node_modules/'], coveragePathIgnorePatterns: ['/node_modules/', '/dist/'], + testPathIgnorePatterns: ['/node_modules/', '/dist/', '/misc/'], coverageReporters: ['text', 'cobertura'], testResultsProcessor: 'jest-junit', moduleNameMapper: { diff --git a/packages/data-schemas/misc/ferretdb/aclBitops.ferretdb.spec.ts b/packages/data-schemas/misc/ferretdb/aclBitops.ferretdb.spec.ts new file mode 100644 index 0000000000..d8fb4ec84b --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/aclBitops.ferretdb.spec.ts @@ -0,0 +1,468 @@ +import mongoose from 'mongoose'; +import { ResourceType, PrincipalType, PermissionBits } from 'librechat-data-provider'; +import type * as t from '~/types'; +import { createAclEntryMethods } from '~/methods/aclEntry'; +import aclEntrySchema from '~/schema/aclEntry'; + +/** + * Integration tests for $bit and $bitsAllSet on FerretDB. + * + * Validates that modifyPermissionBits (using atomic $bit) + * and $bitsAllSet queries work identically on both MongoDB and FerretDB. + * + * Run against FerretDB: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/aclbit_test" npx jest aclBitops.ferretdb + * + * Run against MongoDB (for parity): + * FERRETDB_URI="mongodb://127.0.0.1:27017/aclbit_test" npx jest aclBitops.ferretdb + */ + +const FERRETDB_URI = process.env.FERRETDB_URI; +const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip; + +describeIfFerretDB('ACL bitwise operations - FerretDB compatibility', () => { + let AclEntry: mongoose.Model; + let methods: ReturnType; + + const userId = new mongoose.Types.ObjectId(); + const groupId = new mongoose.Types.ObjectId(); + const grantedById = new mongoose.Types.ObjectId(); + + beforeAll(async () => { + await mongoose.connect(FERRETDB_URI as string); + AclEntry = mongoose.models.AclEntry || mongoose.model('AclEntry', aclEntrySchema); + methods = createAclEntryMethods(mongoose); + await AclEntry.createCollection(); + }); + + afterAll(async () => { + await mongoose.connection.dropDatabase(); + await mongoose.disconnect(); + }); + + afterEach(async () => { + await AclEntry.deleteMany({}); + }); + + describe('modifyPermissionBits (atomic $bit operator)', () => { + it('should add permission bits to existing entry', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + grantedById, + ); + + const updated = await methods.modifyPermissionBits( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.EDIT, + null, + ); + + expect(updated).toBeDefined(); + expect(updated?.permBits).toBe(PermissionBits.VIEW | PermissionBits.EDIT); + }); + + it('should remove permission bits from existing entry', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW | PermissionBits.EDIT | PermissionBits.DELETE, + grantedById, + ); + + const updated = await methods.modifyPermissionBits( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + null, + PermissionBits.EDIT, + ); + + expect(updated).toBeDefined(); + expect(updated?.permBits).toBe(PermissionBits.VIEW | PermissionBits.DELETE); + }); + + it('should add and remove bits in one operation', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + grantedById, + ); + + const updated = await methods.modifyPermissionBits( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.EDIT | PermissionBits.DELETE, + PermissionBits.VIEW, + ); + + expect(updated).toBeDefined(); + expect(updated?.permBits).toBe(PermissionBits.EDIT | PermissionBits.DELETE); + }); + + it('should handle adding bits that are already set (idempotent OR)', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW | PermissionBits.EDIT, + grantedById, + ); + + const updated = await methods.modifyPermissionBits( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + null, + ); + + expect(updated?.permBits).toBe(PermissionBits.VIEW | PermissionBits.EDIT); + }); + + it('should handle removing bits that are not set (no-op AND)', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + grantedById, + ); + + const updated = await methods.modifyPermissionBits( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + null, + PermissionBits.DELETE, + ); + + expect(updated?.permBits).toBe(PermissionBits.VIEW); + }); + + it('should handle all four permission bits', async () => { + const resourceId = new mongoose.Types.ObjectId(); + const allBits = + PermissionBits.VIEW | PermissionBits.EDIT | PermissionBits.DELETE | PermissionBits.SHARE; + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + allBits, + grantedById, + ); + + const afterRemove = await methods.modifyPermissionBits( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + null, + PermissionBits.EDIT | PermissionBits.SHARE, + ); + + expect(afterRemove?.permBits).toBe(PermissionBits.VIEW | PermissionBits.DELETE); + }); + + it('should work with group principals', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.GROUP, + groupId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + grantedById, + ); + + const updated = await methods.modifyPermissionBits( + PrincipalType.GROUP, + groupId, + ResourceType.AGENT, + resourceId, + PermissionBits.EDIT, + null, + ); + + expect(updated?.permBits).toBe(PermissionBits.VIEW | PermissionBits.EDIT); + }); + + it('should work with public principals', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.PUBLIC, + null, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW | PermissionBits.EDIT, + grantedById, + ); + + const updated = await methods.modifyPermissionBits( + PrincipalType.PUBLIC, + null, + ResourceType.AGENT, + resourceId, + null, + PermissionBits.EDIT, + ); + + expect(updated?.permBits).toBe(PermissionBits.VIEW); + }); + + it('should return null when entry does not exist', async () => { + const nonexistentResource = new mongoose.Types.ObjectId(); + + const result = await methods.modifyPermissionBits( + PrincipalType.USER, + userId, + ResourceType.AGENT, + nonexistentResource, + PermissionBits.EDIT, + null, + ); + + expect(result).toBeNull(); + }); + + it('should clear all bits via remove', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW | PermissionBits.EDIT, + grantedById, + ); + + const updated = await methods.modifyPermissionBits( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + null, + PermissionBits.VIEW | PermissionBits.EDIT, + ); + + expect(updated?.permBits).toBe(0); + }); + }); + + describe('$bitsAllSet queries (hasPermission + findAccessibleResources)', () => { + it('should find entries with specific bits set via hasPermission', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW | PermissionBits.EDIT, + grantedById, + ); + + const principals = [{ principalType: PrincipalType.USER, principalId: userId }]; + + expect( + await methods.hasPermission( + principals, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + ), + ).toBe(true); + expect( + await methods.hasPermission( + principals, + ResourceType.AGENT, + resourceId, + PermissionBits.EDIT, + ), + ).toBe(true); + expect( + await methods.hasPermission( + principals, + ResourceType.AGENT, + resourceId, + PermissionBits.DELETE, + ), + ).toBe(false); + }); + + it('should find accessible resources filtered by permission bit', async () => { + const res1 = new mongoose.Types.ObjectId(); + const res2 = new mongoose.Types.ObjectId(); + const res3 = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + res1, + PermissionBits.VIEW, + grantedById, + ); + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + res2, + PermissionBits.VIEW | PermissionBits.EDIT, + grantedById, + ); + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + res3, + PermissionBits.EDIT, + grantedById, + ); + + const principals = [{ principalType: PrincipalType.USER, principalId: userId }]; + + const viewable = await methods.findAccessibleResources( + principals, + ResourceType.AGENT, + PermissionBits.VIEW, + ); + expect(viewable.map((r) => r.toString()).sort()).toEqual( + [res1.toString(), res2.toString()].sort(), + ); + + const editable = await methods.findAccessibleResources( + principals, + ResourceType.AGENT, + PermissionBits.EDIT, + ); + expect(editable.map((r) => r.toString()).sort()).toEqual( + [res2.toString(), res3.toString()].sort(), + ); + }); + + it('should correctly query after modifyPermissionBits changes', async () => { + const resourceId = new mongoose.Types.ObjectId(); + const principals = [{ principalType: PrincipalType.USER, principalId: userId }]; + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + grantedById, + ); + + expect( + await methods.hasPermission( + principals, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + ), + ).toBe(true); + expect( + await methods.hasPermission( + principals, + ResourceType.AGENT, + resourceId, + PermissionBits.EDIT, + ), + ).toBe(false); + + await methods.modifyPermissionBits( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.EDIT, + PermissionBits.VIEW, + ); + + expect( + await methods.hasPermission( + principals, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + ), + ).toBe(false); + expect( + await methods.hasPermission( + principals, + ResourceType.AGENT, + resourceId, + PermissionBits.EDIT, + ), + ).toBe(true); + }); + + it('should combine effective permissions across user and group', async () => { + const resourceId = new mongoose.Types.ObjectId(); + + await methods.grantPermission( + PrincipalType.USER, + userId, + ResourceType.AGENT, + resourceId, + PermissionBits.VIEW, + grantedById, + ); + await methods.grantPermission( + PrincipalType.GROUP, + groupId, + ResourceType.AGENT, + resourceId, + PermissionBits.EDIT, + grantedById, + ); + + const principals = [ + { principalType: PrincipalType.USER, principalId: userId }, + { principalType: PrincipalType.GROUP, principalId: groupId }, + ]; + + const effective = await methods.getEffectivePermissions( + principals, + ResourceType.AGENT, + resourceId, + ); + + expect(effective).toBe(PermissionBits.VIEW | PermissionBits.EDIT); + }); + }); +}); diff --git a/packages/data-schemas/misc/ferretdb/docker-compose.ferretdb.yml b/packages/data-schemas/misc/ferretdb/docker-compose.ferretdb.yml new file mode 100644 index 0000000000..83b6ae7ced --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/docker-compose.ferretdb.yml @@ -0,0 +1,21 @@ +services: + ferretdb-postgres: + image: ghcr.io/ferretdb/postgres-documentdb:17-0.107.0-ferretdb-2.7.0 + restart: on-failure + environment: + - POSTGRES_USER=ferretdb + - POSTGRES_PASSWORD=ferretdb + - POSTGRES_DB=postgres + volumes: + - ferretdb_data:/var/lib/postgresql/data + + ferretdb: + image: ghcr.io/ferretdb/ferretdb:2.7.0 + restart: on-failure + ports: + - "27020:27017" + environment: + - FERRETDB_POSTGRESQL_URL=postgres://ferretdb:ferretdb@ferretdb-postgres:5432/postgres + +volumes: + ferretdb_data: diff --git a/packages/data-schemas/misc/ferretdb/ferretdb-multitenancy-plan.md b/packages/data-schemas/misc/ferretdb/ferretdb-multitenancy-plan.md new file mode 100644 index 0000000000..5e2569d087 --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/ferretdb-multitenancy-plan.md @@ -0,0 +1,204 @@ +# FerretDB Multi-Tenancy Plan + +## Status: Active Investigation + +## Goal + +Database-per-org data isolation using FerretDB (PostgreSQL-backed) with horizontal sharding across multiple FerretDB+Postgres pairs. MongoDB and AWS DocumentDB are not options. + +--- + +## Findings + +### 1. FerretDB Architecture (DocumentDB Backend) + +FerretDB with `postgres-documentdb` does **not** create separate PostgreSQL schemas per MongoDB database. All data lives in a single `documentdb_data` PG schema: + +- Each MongoDB collection β†’ `documents_` + `retry_` table pair +- Catalog tracked in `documentdb_api_catalog.collections` and `.collection_indexes` +- `mongoose.connection.useDb('org_X')` creates a logical database in DocumentDB's catalog + +**Implication**: No PG-level schema isolation, but logical isolation is enforced by FerretDB's wire protocol layer. Backup/restore must go through FerretDB, not raw `pg_dump`. + +### 2. Schema & Index Compatibility + +All 29 LibreChat Mongoose models and 98 custom indexes work on FerretDB v2.7.0: + +| Index Type | Count | Status | +|---|---|---| +| Sparse + unique | 9 (User OAuth IDs) | Working | +| TTL (expireAfterSeconds) | 8 models | Working | +| partialFilterExpression | 2 (File, Group) | Working | +| Compound unique | 5+ | Working | +| Concurrent creation | All 29 models | No deadlock (single org) | + +### 3. Scaling Curve (Empirically Tested) + +| Orgs | Collections | Catalog Indexes | Data Tables | pg_class | Init/org | Query avg | Query p95 | +|------|-------------|-----------------|-------------|----------|----------|-----------|-----------| +| 10 | 450 | 1,920 | 900 | 5,975 | 501ms | 1.03ms | 1.44ms | +| 50 | 1,650 | 7,040 | 3,300 | 20,695 | 485ms | 1.00ms | 1.46ms | +| 100 | 3,150 | 13,440 | 6,300 | 39,095 | 483ms | 0.83ms | 1.13ms | + +**Key finding**: Init time and query latency are flat through 100 orgs. No degradation. + +### 4. Write Amplification + +User model (11+ indexes) vs zero-index collection: **1.11x** β€” only 11% overhead. DocumentDB's JSONB index management is efficient. + +### 5. Sharding PoC + +Tenant router proven with: +- Pool assignment with capacity limits (fill-then-spill) +- Warm cache routing overhead: **0.001ms** (sub-microsecond) +- Cold routing (DB lookup + connection + model registration): **6ms** +- Cross-pool data isolation confirmed +- Express middleware pattern (`req.getModel('User')`) works transparently + +### 6. Scaling Thresholds + +| Org Count | Postgres Instances | Notes | +|-----------|-------------------|-------| +| 1–300 | 1 | Default config | +| 300–700 | 1 | Tune autovacuum, PgBouncer, shared_buffers | +| 700–1,000 | 1-2 | Split when monitoring signals pressure | +| 1,000+ | N / ~500 each | One FerretDB+Postgres pair per ~500 orgs | + +### 7. Deadlock Behavior + +- **Single org, concurrent index creation**: No deadlock (DocumentDB handles it) +- **Bulk provisioning (10 orgs sequential)**: Deadlock occurred on Pool B, recovered via retry +- **Production requirement**: Exponential backoff + jitter retry on `createIndexes()` + +--- + +## Open Items + +### A. Production Deadlock Retry βœ… +- [x] Build `retryWithBackoff` utility with exponential backoff + jitter +- [x] Integrate into `initializeOrgCollections` and `migrateOrg` scripts +- [x] Tested against FerretDB β€” real deadlocks detected and recovered: + - `retry_4` hit a deadlock on `createIndexes(User)`, recovered via backoff (1,839ms total) + - `retry_5` also hit retry path (994ms vs ~170ms clean) + - Production utility at `packages/data-schemas/src/utils/retryWithBackoff.ts` + +### B. Per-Org Backup/Restore βœ… +- [x] `mongodump`/`mongorestore` CLI not available β€” tested programmatic driver-level approach +- [x] **Backup**: `listCollections()` β†’ `find({}).toArray()` per collection β†’ in-memory `OrgBackup` struct +- [x] **Restore**: `collection.insertMany(docs)` per collection into fresh org database +- [x] **BSON type preservation verified**: ObjectId, Date, String all round-trip correctly +- [x] **Data integrity verified**: `_id` values, field values, document counts match exactly +- [x] **Performance**: Backup 24ms, Restore 15ms (8 docs across 29 collections) +- [x] Scales linearly with document count β€” no per-collection overhead beyond the query + +### C. Schema Migration Across Orgs βœ… +- [x] `createIndexes()` is idempotent β€” re-init took 86ms with 12 indexes unchanged +- [x] **New collection propagation**: Added `AuditLog` collection with 4 indexes to 5 orgs β€” 109ms total +- [x] **New index propagation**: Added compound `{username:1, createdAt:-1}` index to `users` across 5 orgs β€” 22ms total +- [x] **Full migration run**: 5 orgs Γ— 29 models = 88ms/org average (with deadlock retry) +- [x] **Data preservation confirmed**: All existing user data intact after migration +- [x] Extrapolating: 1,000 orgs Γ— 88ms/org = ~88 seconds for a full migration sweep + +--- + +## Test Files + +| File | Purpose | +|---|---| +| `packages/data-schemas/src/methods/multiTenancy.ferretdb.spec.ts` | 5-phase benchmark (useDb mapping, indexes, scaling, write amp, shared collection) | +| `packages/data-schemas/src/methods/sharding.ferretdb.spec.ts` | Sharding PoC (router, assignment, isolation, middleware pattern) | +| `packages/data-schemas/src/methods/orgOperations.ferretdb.spec.ts` | Production operations (backup/restore, migration, deadlock retry) | +| `packages/data-schemas/src/utils/retryWithBackoff.ts` | Production retry utility | + +## Docker + +| File | Purpose | +|---|---| +| `docker-compose.ferretdb.yml` | Single FerretDB + Postgres (dev/test) | + +--- + +## Detailed Empirical Results + +### Deadlock Retry Behavior + +The `retryWithBackoff` utility was exercised under real FerretDB load. Key observations: + +| Scenario | Attempts | Total Time | Notes | +|---|---|---|---| +| Clean org init (no contention) | 1 | 165-199ms | Most orgs complete in one shot | +| Deadlock on User indexes | 2 | 994ms | Single retry recovers cleanly | +| Deadlock with compounding retries | 2-3 | 1,839ms | Worst case in 5-org sequential batch | + +The `User` model (11+ indexes including 9 sparse unique) is the most deadlock-prone collection. The retry utility's exponential backoff with jitter (100ms base, 10s cap) handles this gracefully. + +### Backup/Restore Round-Trip + +Tested with a realistic org containing 4 populated collections: + +| Operation | Time | Details | +|---|---|---| +| Backup (full org) | 24ms | 8 docs across 29 collections (25 empty) | +| Restore (to new org) | 15ms | Including `insertMany()` for each collection | +| Index re-creation | ~500ms | Separate `initializeOrgCollections` call | + +Round-trip verified: +- `_id` (ObjectId) preserved exactly +- `createdAt` / `updatedAt` (Date) preserved +- String, Number, ObjectId ref fields preserved +- Document counts match source + +For larger orgs (thousands of messages/conversations), backup time scales linearly with document count. The bottleneck is network I/O to FerretDB, not serialization. + +### Schema Migration Performance + +| Operation | Time | Per Org | +|---|---|---| +| Idempotent re-init (no changes) | 86ms | 86ms | +| New collection + 4 indexes | 109ms | 22ms/org | +| New compound index on users | 22ms | 4.4ms/org | +| Full migration sweep (29 models) | 439ms | 88ms/org | + +Migration is safe to run while the app is serving traffic β€” `createIndexes` and `createCollection` are non-blocking operations that don't lock existing data. + +### 5-Org Provisioning with Production Retry + +``` +retry_1: 193ms (29 models) β€” clean +retry_2: 199ms (29 models) β€” clean +retry_3: 165ms (29 models) β€” clean +retry_4: 1839ms (29 models) β€” deadlock on User indexes, recovered +retry_5: 994ms (29 models) β€” deadlock on User indexes, recovered +Total: 3,390ms for 5 orgs (678ms avg, but 165ms median) +``` + +--- + +## Production Recommendations + +### 1. Org Provisioning + +Use `initializeOrgCollections()` from `packages/data-schemas/src/utils/retryWithBackoff.ts` for all new org setup. Process orgs in batches of 10 with `Promise.all()` to parallelize across pools while minimizing per-pool contention. + +### 2. Backup Strategy + +Implement driver-level backup (not `mongodump`): +- Enumerate collections via `listCollections()` +- Stream documents via `find({}).batchSize(1000)` for large collections +- Write to object storage (S3/GCS) as NDJSON per collection +- Restore via `insertMany()` in batches of 1,000 + +### 3. Schema Migrations + +Run `migrateAllOrgs()` as a deployment step: +- Enumerate all org databases from the assignment table +- For each org: register models, `createCollection()`, `createIndexesWithRetry()` +- `createIndexes()` is idempotent β€” safe to re-run +- At 88ms/org, 1,000 orgs complete in ~90 seconds + +### 4. Monitoring + +Track per-org provisioning and migration times. If the median provisioning time rises above 500ms/org, investigate PostgreSQL catalog pressure: +- `pg_stat_user_tables.n_dead_tup` for autovacuum health +- `pg_stat_bgwriter.buffers_backend` for buffer pressure +- `documentdb_api_catalog.collections` count for total table count diff --git a/packages/data-schemas/misc/ferretdb/jest.ferretdb.config.mjs b/packages/data-schemas/misc/ferretdb/jest.ferretdb.config.mjs new file mode 100644 index 0000000000..b5477be737 --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/jest.ferretdb.config.mjs @@ -0,0 +1,18 @@ +/** + * Jest config for FerretDB integration tests. + * These tests require a running FerretDB instance and are NOT run in CI. + * + * Usage: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/test_db" \ + * npx jest --config misc/ferretdb/jest.ferretdb.config.mjs --testTimeout=300000 [pattern] + */ +export default { + rootDir: '../..', + testMatch: ['/misc/ferretdb/**/*.ferretdb.spec.ts'], + moduleNameMapper: { + '^@src/(.*)$': '/src/$1', + '^~/(.*)$': '/src/$1', + }, + restoreMocks: true, + testTimeout: 300000, +}; diff --git a/packages/data-schemas/misc/ferretdb/migrationAntiJoin.ferretdb.spec.ts b/packages/data-schemas/misc/ferretdb/migrationAntiJoin.ferretdb.spec.ts new file mode 100644 index 0000000000..f2561137b7 --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/migrationAntiJoin.ferretdb.spec.ts @@ -0,0 +1,362 @@ +import mongoose, { Schema, Types } from 'mongoose'; + +/** + * Integration tests for migration anti-join β†’ $nin replacement. + * + * The original migration scripts used a $lookup + $filter + $match({ $size: 0 }) + * anti-join to find resources without ACL entries. FerretDB does not support + * $lookup, so this was replaced with a two-step pattern: + * 1. AclEntry.distinct('resourceId', { resourceType, principalType }) + * 2. Model.find({ _id: { $nin: migratedIds }, ... }) + * + * Run against FerretDB: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/migration_antijoin_test" npx jest migrationAntiJoin.ferretdb + * + * Run against MongoDB (for parity): + * FERRETDB_URI="mongodb://127.0.0.1:27017/migration_antijoin_test" npx jest migrationAntiJoin.ferretdb + */ + +const FERRETDB_URI = process.env.FERRETDB_URI; + +const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip; + +const agentSchema = new Schema({ + id: { type: String, required: true }, + name: { type: String, required: true }, + author: { type: String }, + isCollaborative: { type: Boolean, default: false }, +}); + +const promptGroupSchema = new Schema({ + name: { type: String, required: true }, + author: { type: String }, + authorName: { type: String }, + category: { type: String }, +}); + +const aclEntrySchema = new Schema( + { + principalType: { type: String, required: true }, + principalId: { type: Schema.Types.Mixed }, + resourceType: { type: String, required: true }, + resourceId: { type: Schema.Types.ObjectId, required: true }, + permBits: { type: Number, default: 1 }, + roleId: { type: Schema.Types.ObjectId }, + grantedBy: { type: Schema.Types.ObjectId }, + grantedAt: { type: Date, default: Date.now }, + }, + { timestamps: true }, +); + +type AgentDoc = mongoose.InferSchemaType; +type PromptGroupDoc = mongoose.InferSchemaType; +type AclEntryDoc = mongoose.InferSchemaType; + +describeIfFerretDB('Migration anti-join β†’ $nin - FerretDB compatibility', () => { + let Agent: mongoose.Model; + let PromptGroup: mongoose.Model; + let AclEntry: mongoose.Model; + + beforeAll(async () => { + await mongoose.connect(FERRETDB_URI as string); + Agent = mongoose.model('TestMigAgent', agentSchema); + PromptGroup = mongoose.model('TestMigPromptGroup', promptGroupSchema); + AclEntry = mongoose.model('TestMigAclEntry', aclEntrySchema); + }); + + afterAll(async () => { + await mongoose.connection.db?.dropDatabase(); + await mongoose.disconnect(); + }); + + beforeEach(async () => { + await Agent.deleteMany({}); + await PromptGroup.deleteMany({}); + await AclEntry.deleteMany({}); + }); + + describe('agent migration pattern', () => { + it('should return only agents WITHOUT user-type ACL entries', async () => { + const agent1 = await Agent.create({ id: 'agent_1', name: 'Migrated Agent', author: 'user1' }); + const agent2 = await Agent.create({ + id: 'agent_2', + name: 'Unmigrated Agent', + author: 'user2', + }); + await Agent.create({ id: 'agent_3', name: 'Another Unmigrated', author: 'user3' }); + + await AclEntry.create({ + principalType: 'user', + principalId: new Types.ObjectId(), + resourceType: 'agent', + resourceId: agent1._id, + }); + + await AclEntry.create({ + principalType: 'public', + resourceType: 'agent', + resourceId: agent2._id, + }); + + const migratedIds = await AclEntry.distinct('resourceId', { + resourceType: 'agent', + principalType: 'user', + }); + + const toMigrate = await Agent.find({ + _id: { $nin: migratedIds }, + author: { $exists: true, $ne: null }, + }) + .select('_id id name author isCollaborative') + .lean(); + + expect(toMigrate).toHaveLength(2); + const names = toMigrate.map((a: Record) => a.name).sort(); + expect(names).toEqual(['Another Unmigrated', 'Unmigrated Agent']); + }); + + it('should exclude agents without an author', async () => { + await Agent.create({ id: 'agent_no_author', name: 'No Author' }); + await Agent.create({ id: 'agent_null_author', name: 'Null Author', author: null }); + await Agent.create({ id: 'agent_with_author', name: 'Has Author', author: 'user1' }); + + const migratedIds = await AclEntry.distinct('resourceId', { + resourceType: 'agent', + principalType: 'user', + }); + + const toMigrate = await Agent.find({ + _id: { $nin: migratedIds }, + author: { $exists: true, $ne: null }, + }) + .select('_id id name author') + .lean(); + + expect(toMigrate).toHaveLength(1); + expect((toMigrate[0] as Record).name).toBe('Has Author'); + }); + + it('should return empty array when all agents are migrated', async () => { + const agent1 = await Agent.create({ id: 'a1', name: 'Agent 1', author: 'user1' }); + const agent2 = await Agent.create({ id: 'a2', name: 'Agent 2', author: 'user2' }); + + await AclEntry.create([ + { + principalType: 'user', + principalId: new Types.ObjectId(), + resourceType: 'agent', + resourceId: agent1._id, + }, + { + principalType: 'user', + principalId: new Types.ObjectId(), + resourceType: 'agent', + resourceId: agent2._id, + }, + ]); + + const migratedIds = await AclEntry.distinct('resourceId', { + resourceType: 'agent', + principalType: 'user', + }); + + const toMigrate = await Agent.find({ + _id: { $nin: migratedIds }, + author: { $exists: true, $ne: null }, + }).lean(); + + expect(toMigrate).toHaveLength(0); + }); + + it('should not be confused by ACL entries for a different resourceType', async () => { + const agent = await Agent.create({ id: 'a1', name: 'Agent', author: 'user1' }); + + await AclEntry.create({ + principalType: 'user', + principalId: new Types.ObjectId(), + resourceType: 'promptGroup', + resourceId: agent._id, + }); + + const migratedIds = await AclEntry.distinct('resourceId', { + resourceType: 'agent', + principalType: 'user', + }); + + const toMigrate = await Agent.find({ + _id: { $nin: migratedIds }, + author: { $exists: true, $ne: null }, + }).lean(); + + expect(toMigrate).toHaveLength(1); + expect((toMigrate[0] as Record).name).toBe('Agent'); + }); + + it('should return correct projected fields', async () => { + await Agent.create({ + id: 'proj_agent', + name: 'Field Test', + author: 'user1', + isCollaborative: true, + }); + + const migratedIds = await AclEntry.distinct('resourceId', { + resourceType: 'agent', + principalType: 'user', + }); + + const toMigrate = await Agent.find({ + _id: { $nin: migratedIds }, + author: { $exists: true, $ne: null }, + }) + .select('_id id name author isCollaborative') + .lean(); + + expect(toMigrate).toHaveLength(1); + const agent = toMigrate[0] as Record; + expect(agent).toHaveProperty('_id'); + expect(agent).toHaveProperty('id', 'proj_agent'); + expect(agent).toHaveProperty('name', 'Field Test'); + expect(agent).toHaveProperty('author', 'user1'); + expect(agent).toHaveProperty('isCollaborative', true); + }); + }); + + describe('promptGroup migration pattern', () => { + it('should return only prompt groups WITHOUT user-type ACL entries', async () => { + const pg1 = await PromptGroup.create({ + name: 'Migrated PG', + author: 'user1', + category: 'code', + }); + await PromptGroup.create({ name: 'Unmigrated PG', author: 'user2', category: 'writing' }); + + await AclEntry.create({ + principalType: 'user', + principalId: new Types.ObjectId(), + resourceType: 'promptGroup', + resourceId: pg1._id, + }); + + const migratedIds = await AclEntry.distinct('resourceId', { + resourceType: 'promptGroup', + principalType: 'user', + }); + + const toMigrate = await PromptGroup.find({ + _id: { $nin: migratedIds }, + author: { $exists: true, $ne: null }, + }) + .select('_id name author authorName category') + .lean(); + + expect(toMigrate).toHaveLength(1); + expect((toMigrate[0] as Record).name).toBe('Unmigrated PG'); + }); + + it('should return correct projected fields for prompt groups', async () => { + await PromptGroup.create({ + name: 'PG Fields', + author: 'user1', + authorName: 'Test User', + category: 'marketing', + }); + + const migratedIds = await AclEntry.distinct('resourceId', { + resourceType: 'promptGroup', + principalType: 'user', + }); + + const toMigrate = await PromptGroup.find({ + _id: { $nin: migratedIds }, + author: { $exists: true, $ne: null }, + }) + .select('_id name author authorName category') + .lean(); + + expect(toMigrate).toHaveLength(1); + const pg = toMigrate[0] as Record; + expect(pg).toHaveProperty('_id'); + expect(pg).toHaveProperty('name', 'PG Fields'); + expect(pg).toHaveProperty('author', 'user1'); + expect(pg).toHaveProperty('authorName', 'Test User'); + expect(pg).toHaveProperty('category', 'marketing'); + }); + }); + + describe('cross-resource isolation', () => { + it('should independently track agent and promptGroup migrations', async () => { + const agent = await Agent.create({ + id: 'iso_agent', + name: 'Isolated Agent', + author: 'user1', + }); + await PromptGroup.create({ name: 'Isolated PG', author: 'user2' }); + + await AclEntry.create({ + principalType: 'user', + principalId: new Types.ObjectId(), + resourceType: 'agent', + resourceId: agent._id, + }); + + const migratedAgentIds = await AclEntry.distinct('resourceId', { + resourceType: 'agent', + principalType: 'user', + }); + const migratedPGIds = await AclEntry.distinct('resourceId', { + resourceType: 'promptGroup', + principalType: 'user', + }); + + const agentsToMigrate = await Agent.find({ + _id: { $nin: migratedAgentIds }, + author: { $exists: true, $ne: null }, + }).lean(); + + const pgsToMigrate = await PromptGroup.find({ + _id: { $nin: migratedPGIds }, + author: { $exists: true, $ne: null }, + }).lean(); + + expect(agentsToMigrate).toHaveLength(0); + expect(pgsToMigrate).toHaveLength(1); + }); + }); + + describe('scale behavior', () => { + it('should correctly handle many resources with partial migration', async () => { + const agents = []; + for (let i = 0; i < 20; i++) { + agents.push({ id: `agent_${i}`, name: `Agent ${i}`, author: `user_${i}` }); + } + const created = await Agent.insertMany(agents); + + const migrateEvens = created + .filter((_, i) => i % 2 === 0) + .map((a) => ({ + principalType: 'user', + principalId: new Types.ObjectId(), + resourceType: 'agent', + resourceId: a._id, + })); + await AclEntry.insertMany(migrateEvens); + + const migratedIds = await AclEntry.distinct('resourceId', { + resourceType: 'agent', + principalType: 'user', + }); + + const toMigrate = await Agent.find({ + _id: { $nin: migratedIds }, + author: { $exists: true, $ne: null }, + }).lean(); + + expect(toMigrate).toHaveLength(10); + const indices = toMigrate + .map((a) => parseInt(String(a.name).replace('Agent ', ''), 10)) + .sort((a, b) => a - b); + expect(indices).toEqual([1, 3, 5, 7, 9, 11, 13, 15, 17, 19]); + }); + }); +}); diff --git a/packages/data-schemas/misc/ferretdb/multiTenancy.ferretdb.spec.ts b/packages/data-schemas/misc/ferretdb/multiTenancy.ferretdb.spec.ts new file mode 100644 index 0000000000..a4d895f37a --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/multiTenancy.ferretdb.spec.ts @@ -0,0 +1,649 @@ +import mongoose from 'mongoose'; +import { execSync } from 'child_process'; +import { + actionSchema, + agentSchema, + agentApiKeySchema, + agentCategorySchema, + assistantSchema, + balanceSchema, + bannerSchema, + conversationTagSchema, + convoSchema, + fileSchema, + keySchema, + messageSchema, + pluginAuthSchema, + presetSchema, + projectSchema, + promptSchema, + promptGroupSchema, + roleSchema, + sessionSchema, + shareSchema, + tokenSchema, + toolCallSchema, + transactionSchema, + userSchema, + memorySchema, + groupSchema, +} from '~/schema'; +import accessRoleSchema from '~/schema/accessRole'; +import aclEntrySchema from '~/schema/aclEntry'; +import mcpServerSchema from '~/schema/mcpServer'; + +/** + * FerretDB Multi-Tenancy Benchmark + * + * Validates whether FerretDB can handle LibreChat's multi-tenancy model + * at scale using database-per-org isolation via Mongoose useDb(). + * + * Phases: + * 1. useDb schema mapping β€” verifies per-org PostgreSQL schema creation and data isolation + * 2. Index initialization β€” validates all 29 collections + 97 indexes, tests for deadlocks + * 3. Scaling curve β€” measures catalog growth, init time, and query latency at 10/50/100 orgs + * 4. Write amplification β€” compares update cost on high-index vs zero-index collections + * 5. Shared-collection alternative β€” benchmarks orgId-discriminated shared collections + * + * Run: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/mt_bench" \ + * npx jest multiTenancy.ferretdb --testTimeout=600000 + * + * Env vars: + * FERRETDB_URI β€” Required. FerretDB connection string. + * PG_CONTAINER β€” Docker container name for psql (default: librechat-ferretdb-postgres-1) + * SCALE_TIERS β€” Comma-separated org counts (default: 10,50,100) + * WRITE_AMP_DOCS β€” Number of docs for write amp test (default: 200) + */ + +const FERRETDB_URI = process.env.FERRETDB_URI; +const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip; + +const PG_CONTAINER = process.env.PG_CONTAINER || 'librechat-ferretdb-postgres-1'; +const PG_USER = 'ferretdb'; +const ORG_PREFIX = 'mt_bench_'; + +const DEFAULT_TIERS = [10, 50, 100]; +const SCALE_TIERS: number[] = process.env.SCALE_TIERS + ? process.env.SCALE_TIERS.split(',').map(Number) + : DEFAULT_TIERS; + +const WRITE_AMP_DOCS = parseInt(process.env.WRITE_AMP_DOCS || '200', 10); + +/** All 29 LibreChat schemas by Mongoose model name */ +const MODEL_SCHEMAS: Record = { + User: userSchema, + Token: tokenSchema, + Session: sessionSchema, + Balance: balanceSchema, + Conversation: convoSchema, + Message: messageSchema, + Agent: agentSchema, + AgentApiKey: agentApiKeySchema, + AgentCategory: agentCategorySchema, + MCPServer: mcpServerSchema, + Role: roleSchema, + Action: actionSchema, + Assistant: assistantSchema, + File: fileSchema, + Banner: bannerSchema, + Project: projectSchema, + Key: keySchema, + PluginAuth: pluginAuthSchema, + Transaction: transactionSchema, + Preset: presetSchema, + Prompt: promptSchema, + PromptGroup: promptGroupSchema, + ConversationTag: conversationTagSchema, + SharedLink: shareSchema, + ToolCall: toolCallSchema, + MemoryEntry: memorySchema, + AccessRole: accessRoleSchema, + AclEntry: aclEntrySchema, + Group: groupSchema, +}; + +const MODEL_COUNT = Object.keys(MODEL_SCHEMAS).length; + +/** Register all 29 models on a given Mongoose Connection */ +function registerModels(conn: mongoose.Connection): Record> { + const models: Record> = {}; + for (const [name, schema] of Object.entries(MODEL_SCHEMAS)) { + models[name] = conn.models[name] || conn.model(name, schema); + } + return models; +} + +/** Initialize one org database: create all collections then build all indexes sequentially */ +async function initializeOrgDb(conn: mongoose.Connection): Promise<{ + models: Record>; + durationMs: number; +}> { + const models = registerModels(conn); + const start = Date.now(); + for (const model of Object.values(models)) { + await model.createCollection(); + await model.createIndexes(); + } + return { models, durationMs: Date.now() - start }; +} + +/** Execute a psql command against the FerretDB PostgreSQL backend via docker exec */ +function psql(query: string): string { + try { + const escaped = query.replace(/"/g, '\\"'); + return execSync( + `docker exec ${PG_CONTAINER} psql -U ${PG_USER} -d postgres -t -A -c "${escaped}"`, + { encoding: 'utf-8', timeout: 30_000 }, + ).trim(); + } catch { + return ''; + } +} + +/** + * Snapshot of DocumentDB catalog + PostgreSQL system catalog sizes. + * FerretDB with DocumentDB stores all data in a single `documentdb_data` schema. + * Each MongoDB collection β†’ `documents_` + `retry_` table pair. + * The catalog lives in `documentdb_api_catalog.collections` and `.collection_indexes`. + */ +function catalogMetrics() { + return { + collections: parseInt(psql('SELECT count(*) FROM documentdb_api_catalog.collections'), 10) || 0, + databases: + parseInt( + psql('SELECT count(DISTINCT database_name) FROM documentdb_api_catalog.collections'), + 10, + ) || 0, + catalogIndexes: + parseInt(psql('SELECT count(*) FROM documentdb_api_catalog.collection_indexes'), 10) || 0, + dataTables: + parseInt( + psql( + "SELECT count(*) FROM information_schema.tables WHERE table_schema = 'documentdb_data'", + ), + 10, + ) || 0, + pgClassTotal: parseInt(psql('SELECT count(*) FROM pg_class'), 10) || 0, + pgStatRows: parseInt(psql('SELECT count(*) FROM pg_statistic'), 10) || 0, + }; +} + +/** Measure point-query latency over N iterations and return percentile stats */ +async function measureLatency( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + model: mongoose.Model, + filter: Record, + iterations = 50, +) { + await model.findOne(filter).lean(); + + const times: number[] = []; + for (let i = 0; i < iterations; i++) { + const t0 = process.hrtime.bigint(); + await model.findOne(filter).lean(); + times.push(Number(process.hrtime.bigint() - t0) / 1e6); + } + + times.sort((a, b) => a - b); + return { + min: times[0], + max: times[times.length - 1], + median: times[Math.floor(times.length / 2)], + p95: times[Math.floor(times.length * 0.95)], + avg: times.reduce((s, v) => s + v, 0) / times.length, + }; +} + +function fmt(n: number): string { + return n.toFixed(2); +} + +describeIfFerretDB('FerretDB Multi-Tenancy Benchmark', () => { + const createdDbs: string[] = []; + + beforeAll(async () => { + await mongoose.connect(FERRETDB_URI as string, { autoIndex: false }); + }); + + afterAll(async () => { + for (const db of createdDbs) { + try { + await mongoose.connection.useDb(db, { useCache: false }).dropDatabase(); + } catch { + /* best-effort cleanup */ + } + } + try { + await mongoose.connection.dropDatabase(); + } catch { + /* best-effort */ + } + await mongoose.disconnect(); + }, 600_000); + + // ─── PHASE 1: DATABASE-PER-ORG SCHEMA MAPPING ──────────────────────────── + + describe('Phase 1: useDb Schema Mapping', () => { + const org1Db = `${ORG_PREFIX}iso_1`; + const org2Db = `${ORG_PREFIX}iso_2`; + let org1Models: Record>; + let org2Models: Record>; + + beforeAll(() => { + createdDbs.push(org1Db, org2Db); + }); + + it('creates separate databases with all 29 collections via useDb()', async () => { + const c1 = mongoose.connection.useDb(org1Db, { useCache: true }); + const c2 = mongoose.connection.useDb(org2Db, { useCache: true }); + + const r1 = await initializeOrgDb(c1); + const r2 = await initializeOrgDb(c2); + org1Models = r1.models; + org2Models = r2.models; + + console.log(`[Phase 1] org1 init: ${r1.durationMs}ms | org2 init: ${r2.durationMs}ms`); + + expect(Object.keys(org1Models)).toHaveLength(MODEL_COUNT); + expect(Object.keys(org2Models)).toHaveLength(MODEL_COUNT); + }, 120_000); + + it('maps each useDb database to a separate entry in the DocumentDB catalog', () => { + const raw = psql( + `SELECT database_name FROM documentdb_api_catalog.collections WHERE database_name LIKE '${ORG_PREFIX}%' GROUP BY database_name ORDER BY database_name`, + ); + const dbNames = raw.split('\n').filter(Boolean); + console.log('[Phase 1] DocumentDB databases:', dbNames); + + expect(dbNames).toContain(org1Db); + expect(dbNames).toContain(org2Db); + + const perDb = psql( + `SELECT database_name, count(*) FROM documentdb_api_catalog.collections WHERE database_name LIKE '${ORG_PREFIX}%' GROUP BY database_name ORDER BY database_name`, + ); + console.log('[Phase 1] Collections per database:\n' + perDb); + }); + + it('isolates data between org databases', async () => { + await org1Models.User.create({ + name: 'Org1 User', + email: 'u@org1.test', + username: 'org1user', + }); + await org2Models.User.create({ + name: 'Org2 User', + email: 'u@org2.test', + username: 'org2user', + }); + + const u1 = await org1Models.User.find({}).lean(); + const u2 = await org2Models.User.find({}).lean(); + + expect(u1).toHaveLength(1); + expect(u2).toHaveLength(1); + expect((u1[0] as Record).email).toBe('u@org1.test'); + expect((u2[0] as Record).email).toBe('u@org2.test'); + }, 30_000); + }); + + // ─── PHASE 2: INDEX INITIALIZATION ──────────────────────────────────────── + + describe('Phase 2: Index Initialization', () => { + const seqDb = `${ORG_PREFIX}idx_seq`; + + beforeAll(() => { + createdDbs.push(seqDb); + }); + + it('creates all indexes sequentially and reports per-model breakdown', async () => { + const conn = mongoose.connection.useDb(seqDb, { useCache: true }); + const models = registerModels(conn); + + const stats: { name: string; ms: number; idxCount: number }[] = []; + for (const [name, model] of Object.entries(models)) { + const t0 = Date.now(); + await model.createCollection(); + await model.createIndexes(); + const idxs = await model.collection.indexes(); + stats.push({ name, ms: Date.now() - t0, idxCount: idxs.length - 1 }); + } + + const totalMs = stats.reduce((s, r) => s + r.ms, 0); + const totalIdx = stats.reduce((s, r) => s + r.idxCount, 0); + + console.log(`[Phase 2] Sequential: ${totalMs}ms total, ${totalIdx} custom indexes`); + console.log('[Phase 2] Slowest 10:'); + for (const s of stats.sort((a, b) => b.ms - a.ms).slice(0, 10)) { + console.log(` ${s.name.padEnd(20)} ${String(s.idxCount).padStart(3)} indexes ${s.ms}ms`); + } + + expect(totalIdx).toBeGreaterThanOrEqual(90); + }, 120_000); + + it('tests concurrent index creation for deadlock risk', async () => { + const concDb = `${ORG_PREFIX}idx_conc`; + createdDbs.push(concDb); + const conn = mongoose.connection.useDb(concDb, { useCache: false }); + const models = registerModels(conn); + + for (const model of Object.values(models)) { + await model.createCollection(); + } + + const t0 = Date.now(); + try { + await Promise.all(Object.values(models).map((m) => m.createIndexes())); + console.log(`[Phase 2] Concurrent: ${Date.now() - t0}ms β€” no deadlock`); + } catch (err) { + console.warn( + `[Phase 2] Concurrent: DEADLOCKED after ${Date.now() - t0}ms β€” ${(err as Error).message}`, + ); + } + }, 120_000); + + it('verifies sparse, partial, and TTL index types on FerretDB', async () => { + const conn = mongoose.connection.useDb(seqDb, { useCache: true }); + + const userIdxs = await conn.model('User').collection.indexes(); + const sparseCount = userIdxs.filter((i: Record) => i.sparse).length; + const ttlCount = userIdxs.filter( + (i: Record) => i.expireAfterSeconds !== undefined, + ).length; + console.log( + `[Phase 2] User: ${userIdxs.length} total, ${sparseCount} sparse, ${ttlCount} TTL`, + ); + expect(sparseCount).toBeGreaterThanOrEqual(8); + + const fileIdxs = await conn.model('File').collection.indexes(); + const partialFile = fileIdxs.find( + (i: Record) => i.partialFilterExpression != null, + ); + console.log(`[Phase 2] File partialFilterExpression: ${partialFile ? 'YES' : 'NO'}`); + expect(partialFile).toBeDefined(); + + const groupIdxs = await conn.model('Group').collection.indexes(); + const sparseGroup = groupIdxs.find((i: Record) => i.sparse); + const partialGroup = groupIdxs.find( + (i: Record) => i.partialFilterExpression != null, + ); + console.log( + `[Phase 2] Group: sparse=${sparseGroup ? 'YES' : 'NO'}, partial=${partialGroup ? 'YES' : 'NO'}`, + ); + expect(sparseGroup).toBeDefined(); + expect(partialGroup).toBeDefined(); + }, 60_000); + }); + + // ─── PHASE 3: SCALING CURVE ─────────────────────────────────────────────── + + describe('Phase 3: Scaling Curve', () => { + interface TierResult { + tier: number; + batchMs: number; + avgPerOrg: number; + catalog: ReturnType; + latency: Awaited>; + } + + const tierResults: TierResult[] = []; + let orgsCreated = 0; + let firstOrgConn: mongoose.Connection | null = null; + + beforeAll(() => { + const baseline = catalogMetrics(); + console.log( + `[Phase 3] Baseline β€” collections: ${baseline.collections}, ` + + `databases: ${baseline.databases}, catalog indexes: ${baseline.catalogIndexes}, ` + + `data tables: ${baseline.dataTables}, pg_class: ${baseline.pgClassTotal}`, + ); + }); + + it.each(SCALE_TIERS)( + 'scales to %i orgs', + async (target) => { + const t0 = Date.now(); + + for (let i = orgsCreated + 1; i <= target; i++) { + const dbName = `${ORG_PREFIX}s${i}`; + createdDbs.push(dbName); + + const conn = mongoose.connection.useDb(dbName, { useCache: i === 1 }); + if (i === 1) { + firstOrgConn = conn; + } + + const models = registerModels(conn); + for (const model of Object.values(models)) { + await model.createCollection(); + await model.createIndexes(); + } + + if (i === 1) { + await models.User.create({ + name: 'Latency Probe', + email: 'probe@scale.test', + username: 'probe', + }); + } + + if (i % 10 === 0) { + process.stdout.write(` ${i}/${target} orgs\n`); + } + } + + const batchMs = Date.now() - t0; + const batchSize = target - orgsCreated; + orgsCreated = target; + + const lat = await measureLatency(firstOrgConn!.model('User'), { + email: 'probe@scale.test', + }); + const cat = catalogMetrics(); + + tierResults.push({ + tier: target, + batchMs, + avgPerOrg: batchSize > 0 ? Math.round(batchMs / batchSize) : 0, + catalog: cat, + latency: lat, + }); + + console.log(`\n[Phase 3] === ${target} orgs ===`); + console.log( + ` Init: ${batchMs}ms total (${batchSize > 0 ? Math.round(batchMs / batchSize) : 0}ms/org, batch=${batchSize})`, + ); + console.log( + ` Query: avg=${fmt(lat.avg)}ms median=${fmt(lat.median)}ms p95=${fmt(lat.p95)}ms`, + ); + console.log( + ` Catalog: ${cat.collections} collections, ${cat.catalogIndexes} indexes, ` + + `${cat.dataTables} data tables, pg_class=${cat.pgClassTotal}`, + ); + + expect(cat.collections).toBeGreaterThan(0); + }, + 600_000, + ); + + afterAll(() => { + if (tierResults.length === 0) { + return; + } + + const hdr = [ + 'Orgs', + 'Colls', + 'CatIdx', + 'DataTbls', + 'pg_class', + 'Init/org', + 'Qry avg', + 'Qry p95', + ]; + const w = [8, 10, 10, 10, 12, 12, 12, 12]; + + console.log('\n[Phase 3] SCALING SUMMARY'); + console.log('─'.repeat(w.reduce((a, b) => a + b))); + console.log(hdr.map((h, i) => h.padEnd(w[i])).join('')); + console.log('─'.repeat(w.reduce((a, b) => a + b))); + + for (const r of tierResults) { + const row = [ + String(r.tier), + String(r.catalog.collections), + String(r.catalog.catalogIndexes), + String(r.catalog.dataTables), + String(r.catalog.pgClassTotal), + `${r.avgPerOrg}ms`, + `${fmt(r.latency.avg)}ms`, + `${fmt(r.latency.p95)}ms`, + ]; + console.log(row.map((v, i) => v.padEnd(w[i])).join('')); + } + console.log('─'.repeat(w.reduce((a, b) => a + b))); + }); + }); + + // ─── PHASE 4: WRITE AMPLIFICATION ──────────────────────────────────────── + + describe('Phase 4: Write Amplification', () => { + it('compares update cost: high-index (User, 11+ idx) vs zero-index collection', async () => { + const db = `${ORG_PREFIX}wamp`; + createdDbs.push(db); + const conn = mongoose.connection.useDb(db, { useCache: false }); + + const HighIdx = conn.model('User', userSchema); + await HighIdx.createCollection(); + await HighIdx.createIndexes(); + + const bareSchema = new mongoose.Schema({ name: String, email: String, ts: Date }); + const LowIdx = conn.model('BareDoc', bareSchema); + await LowIdx.createCollection(); + + const N = WRITE_AMP_DOCS; + + await HighIdx.insertMany( + Array.from({ length: N }, (_, i) => ({ + name: `U${i}`, + email: `u${i}@wamp.test`, + username: `u${i}`, + })), + ); + await LowIdx.insertMany( + Array.from({ length: N }, (_, i) => ({ + name: `U${i}`, + email: `u${i}@wamp.test`, + ts: new Date(), + })), + ); + + const walBefore = psql('SELECT wal_bytes FROM pg_stat_wal'); + + const highStart = Date.now(); + for (let i = 0; i < N; i++) { + await HighIdx.updateOne({ email: `u${i}@wamp.test` }, { $set: { name: `X${i}` } }); + } + const highMs = Date.now() - highStart; + + const walMid = psql('SELECT wal_bytes FROM pg_stat_wal'); + + const lowStart = Date.now(); + for (let i = 0; i < N; i++) { + await LowIdx.updateOne({ email: `u${i}@wamp.test` }, { $set: { name: `X${i}` } }); + } + const lowMs = Date.now() - lowStart; + + const walAfter = psql('SELECT wal_bytes FROM pg_stat_wal'); + + console.log(`\n[Phase 4] Write Amplification (${N} updates each)`); + console.log(` High-index (User, 11+ idx): ${highMs}ms (${fmt(highMs / N)}ms/op)`); + console.log(` Zero-index (bare): ${lowMs}ms (${fmt(lowMs / N)}ms/op)`); + console.log(` Time ratio: ${fmt(highMs / Math.max(lowMs, 1))}x`); + + if (walBefore && walMid && walAfter) { + const wHigh = BigInt(walMid) - BigInt(walBefore); + const wLow = BigInt(walAfter) - BigInt(walMid); + console.log(` WAL: high-idx=${wHigh} bytes, bare=${wLow} bytes`); + if (wLow > BigInt(0)) { + console.log(` WAL ratio: ${fmt(Number(wHigh) / Number(wLow))}x`); + } + } + + expect(highMs).toBeGreaterThan(0); + expect(lowMs).toBeGreaterThan(0); + }, 300_000); + }); + + // ─── PHASE 5: SHARED-COLLECTION ALTERNATIVE ────────────────────────────── + + describe('Phase 5: Shared Collection Alternative', () => { + it('benchmarks shared collection with orgId discriminator field', async () => { + const db = `${ORG_PREFIX}shared`; + createdDbs.push(db); + const conn = mongoose.connection.useDb(db, { useCache: false }); + + const sharedSchema = new mongoose.Schema({ + orgId: { type: String, required: true, index: true }, + name: String, + email: String, + username: String, + provider: { type: String, default: 'local' }, + role: { type: String, default: 'USER' }, + }); + sharedSchema.index({ orgId: 1, email: 1 }, { unique: true }); + + const Shared = conn.model('SharedUser', sharedSchema); + await Shared.createCollection(); + await Shared.createIndexes(); + + const ORG_N = 100; + const USERS_PER = 50; + + const docs = []; + for (let o = 0; o < ORG_N; o++) { + for (let u = 0; u < USERS_PER; u++) { + docs.push({ + orgId: `org_${o}`, + name: `User ${u}`, + email: `u${u}@o${o}.test`, + username: `u${u}_o${o}`, + }); + } + } + + const insertT0 = Date.now(); + await Shared.insertMany(docs, { ordered: false }); + const insertMs = Date.now() - insertT0; + + const totalDocs = ORG_N * USERS_PER; + console.log(`\n[Phase 5] Shared collection: ${totalDocs} docs inserted in ${insertMs}ms`); + + const pointLat = await measureLatency(Shared, { + orgId: 'org_50', + email: 'u25@o50.test', + }); + console.log( + ` Point query: avg=${fmt(pointLat.avg)}ms median=${fmt(pointLat.median)}ms p95=${fmt(pointLat.p95)}ms`, + ); + + const listT0 = Date.now(); + const orgDocs = await Shared.find({ orgId: 'org_50' }).lean(); + const listMs = Date.now() - listT0; + console.log(` List org users (${orgDocs.length} docs): ${listMs}ms`); + + const countT0 = Date.now(); + const count = await Shared.countDocuments({ orgId: 'org_50' }); + const countMs = Date.now() - countT0; + console.log(` Count org users: ${count} in ${countMs}ms`); + + const cat = catalogMetrics(); + console.log( + ` Catalog: ${cat.collections} collections, ${cat.catalogIndexes} indexes, ` + + `${cat.dataTables} data tables (shared approach = 1 extra db, minimal overhead)`, + ); + + expect(orgDocs).toHaveLength(USERS_PER); + }, 120_000); + }); +}); diff --git a/packages/data-schemas/misc/ferretdb/orgOperations.ferretdb.spec.ts b/packages/data-schemas/misc/ferretdb/orgOperations.ferretdb.spec.ts new file mode 100644 index 0000000000..fdea2eb8fc --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/orgOperations.ferretdb.spec.ts @@ -0,0 +1,675 @@ +import mongoose, { Schema, type Connection, type Model } from 'mongoose'; +import { + actionSchema, + agentSchema, + agentApiKeySchema, + agentCategorySchema, + assistantSchema, + balanceSchema, + bannerSchema, + conversationTagSchema, + convoSchema, + fileSchema, + keySchema, + messageSchema, + pluginAuthSchema, + presetSchema, + projectSchema, + promptSchema, + promptGroupSchema, + roleSchema, + sessionSchema, + shareSchema, + tokenSchema, + toolCallSchema, + transactionSchema, + userSchema, + memorySchema, + groupSchema, +} from '~/schema'; +import accessRoleSchema from '~/schema/accessRole'; +import mcpServerSchema from '~/schema/mcpServer'; +import aclEntrySchema from '~/schema/aclEntry'; +import { initializeOrgCollections, createIndexesWithRetry, retryWithBackoff } from '~/utils/retry'; + +/** + * Production operations tests for FerretDB multi-tenancy: + * 1. Retry utility under simulated and real deadlock conditions + * 2. Programmatic per-org backup/restore (driver-level, no mongodump) + * 3. Schema migration across existing org databases + * + * Run: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/ops_test" \ + * npx jest orgOperations.ferretdb --testTimeout=300000 + */ + +const FERRETDB_URI = process.env.FERRETDB_URI; +const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip; + +const DB_PREFIX = 'ops_test_'; + +const MODEL_SCHEMAS: Record = { + User: userSchema, + Token: tokenSchema, + Session: sessionSchema, + Balance: balanceSchema, + Conversation: convoSchema, + Message: messageSchema, + Agent: agentSchema, + AgentApiKey: agentApiKeySchema, + AgentCategory: agentCategorySchema, + MCPServer: mcpServerSchema, + Role: roleSchema, + Action: actionSchema, + Assistant: assistantSchema, + File: fileSchema, + Banner: bannerSchema, + Project: projectSchema, + Key: keySchema, + PluginAuth: pluginAuthSchema, + Transaction: transactionSchema, + Preset: presetSchema, + Prompt: promptSchema, + PromptGroup: promptGroupSchema, + ConversationTag: conversationTagSchema, + SharedLink: shareSchema, + ToolCall: toolCallSchema, + MemoryEntry: memorySchema, + AccessRole: accessRoleSchema, + AclEntry: aclEntrySchema, + Group: groupSchema, +}; + +const MODEL_COUNT = Object.keys(MODEL_SCHEMAS).length; + +function registerModels(conn: Connection): Record> { + const models: Record> = {}; + for (const [name, schema] of Object.entries(MODEL_SCHEMAS)) { + models[name] = conn.models[name] || conn.model(name, schema); + } + return models; +} + +// ─── BACKUP/RESTORE UTILITIES ─────────────────────────────────────────────── + +interface OrgBackup { + orgId: string; + timestamp: Date; + collections: Record; +} + +/** Dump all collections from an org database to an in-memory structure */ +async function backupOrg(conn: Connection, orgId: string): Promise { + const collectionNames = (await conn.db!.listCollections().toArray()).map((c) => c.name); + const collections: Record = {}; + + for (const name of collectionNames) { + if (name.startsWith('system.')) { + continue; + } + const docs = await conn.db!.collection(name).find({}).toArray(); + collections[name] = docs; + } + + return { orgId, timestamp: new Date(), collections }; +} + +/** Restore collections from a backup into a target connection */ +async function restoreOrg( + conn: Connection, + backup: OrgBackup, +): Promise<{ collectionsRestored: number; docsRestored: number }> { + let docsRestored = 0; + + for (const [name, docs] of Object.entries(backup.collections)) { + if (docs.length === 0) { + continue; + } + const collection = conn.db!.collection(name); + await collection.insertMany(docs as Array>); + docsRestored += docs.length; + } + + return { collectionsRestored: Object.keys(backup.collections).length, docsRestored }; +} + +// ─── MIGRATION UTILITIES ──────────────────────────────────────────────────── + +interface MigrationResult { + orgId: string; + newCollections: string[]; + indexResults: Array<{ model: string; created: boolean; ms: number }>; + totalMs: number; +} + +/** Migrate a single org: ensure all collections exist and all indexes are current */ +async function migrateOrg( + conn: Connection, + orgId: string, + schemas: Record, +): Promise { + const t0 = Date.now(); + const models = registerModels(conn); + const existingCollections = new Set( + (await conn.db!.listCollections().toArray()).map((c) => c.name), + ); + + const newCollections: string[] = []; + const indexResults: Array<{ model: string; created: boolean; ms: number }> = []; + + for (const [name, model] of Object.entries(models)) { + const collName = model.collection.collectionName; + const isNew = !existingCollections.has(collName); + if (isNew) { + newCollections.push(name); + } + + const mt0 = Date.now(); + await model.createCollection(); + await createIndexesWithRetry(model); + indexResults.push({ model: name, created: isNew, ms: Date.now() - mt0 }); + } + + return { orgId, newCollections, indexResults, totalMs: Date.now() - t0 }; +} + +/** Migrate all orgs in sequence with progress reporting */ +async function migrateAllOrgs( + baseConn: Connection, + orgIds: string[], + schemas: Record, + onProgress?: (completed: number, total: number, result: MigrationResult) => void, +): Promise { + const results: MigrationResult[] = []; + + for (let i = 0; i < orgIds.length; i++) { + const orgId = orgIds[i]; + const conn = baseConn.useDb(`${DB_PREFIX}org_${orgId}`, { useCache: true }); + const result = await migrateOrg(conn, orgId, schemas); + results.push(result); + if (onProgress) { + onProgress(i + 1, orgIds.length, result); + } + } + + return results; +} + +// ─── TESTS ────────────────────────────────────────────────────────────────── + +describeIfFerretDB('Org Operations (Production)', () => { + const createdDbs: string[] = []; + let baseConn: Connection; + + beforeAll(async () => { + baseConn = await mongoose.createConnection(FERRETDB_URI as string).asPromise(); + }); + + afterAll(async () => { + for (const db of createdDbs) { + try { + await baseConn.useDb(db, { useCache: false }).dropDatabase(); + } catch { + /* best-effort */ + } + } + await baseConn.close(); + }, 120_000); + + // ─── RETRY UTILITY ────────────────────────────────────────────────────── + + describe('retryWithBackoff', () => { + it('succeeds on first attempt when no error', async () => { + let calls = 0; + const result = await retryWithBackoff(async () => { + calls++; + return 'ok'; + }, 'test-op'); + expect(result).toBe('ok'); + expect(calls).toBe(1); + }); + + it('retries on deadlock error and eventually succeeds', async () => { + let calls = 0; + const result = await retryWithBackoff( + async () => { + calls++; + if (calls < 3) { + throw new Error('deadlock detected'); + } + return 'recovered'; + }, + 'deadlock-test', + { baseDelayMs: 10, jitter: false }, + ); + + expect(result).toBe('recovered'); + expect(calls).toBe(3); + }); + + it('does not retry on non-retryable errors', async () => { + let calls = 0; + await expect( + retryWithBackoff( + async () => { + calls++; + throw new Error('validation failed'); + }, + 'non-retryable', + { baseDelayMs: 10 }, + ), + ).rejects.toThrow('validation failed'); + expect(calls).toBe(1); + }); + + it('exhausts max attempts and throws', async () => { + let calls = 0; + await expect( + retryWithBackoff( + async () => { + calls++; + throw new Error('deadlock detected'); + }, + 'exhausted', + { maxAttempts: 3, baseDelayMs: 10, jitter: false }, + ), + ).rejects.toThrow('deadlock'); + expect(calls).toBe(3); + }); + + it('respects maxDelayMs cap', async () => { + const delays: number[] = []; + let calls = 0; + + await retryWithBackoff( + async () => { + calls++; + if (calls < 4) { + throw new Error('deadlock detected'); + } + return 'ok'; + }, + 'delay-cap', + { + baseDelayMs: 100, + maxDelayMs: 250, + jitter: false, + onRetry: (_err, _attempt, delay) => delays.push(delay), + }, + ); + + expect(delays[0]).toBe(100); + expect(delays[1]).toBe(200); + expect(delays[2]).toBe(250); + }); + }); + + // ─── REAL DEADLOCK RETRY ──────────────────────────────────────────────── + + describe('initializeOrgCollections with retry', () => { + it('provisions 5 orgs sequentially using the production utility', async () => { + const orgIds = ['retry_1', 'retry_2', 'retry_3', 'retry_4', 'retry_5']; + const results: Array<{ orgId: string; ms: number; models: number }> = []; + + for (const orgId of orgIds) { + const dbName = `${DB_PREFIX}org_${orgId}`; + createdDbs.push(dbName); + const conn = baseConn.useDb(dbName, { useCache: true }); + const models = registerModels(conn); + + const { totalMs } = await initializeOrgCollections(models, { + baseDelayMs: 50, + maxAttempts: 5, + }); + results.push({ orgId, ms: totalMs, models: Object.keys(models).length }); + } + + const totalMs = results.reduce((s, r) => s + r.ms, 0); + console.log(`[Retry] 5 orgs provisioned in ${totalMs}ms:`); + for (const r of results) { + console.log(` ${r.orgId}: ${r.ms}ms (${r.models} models)`); + } + + expect(results.every((r) => r.models === MODEL_COUNT)).toBe(true); + }, 120_000); + }); + + // ─── BACKUP/RESTORE ───────────────────────────────────────────────────── + + describe('per-org backup and restore', () => { + const sourceOrg = 'backup_src'; + const targetOrg = 'backup_dst'; + + beforeAll(async () => { + const srcDb = `${DB_PREFIX}org_${sourceOrg}`; + createdDbs.push(srcDb, `${DB_PREFIX}org_${targetOrg}`); + const srcConn = baseConn.useDb(srcDb, { useCache: true }); + const models = registerModels(srcConn); + await initializeOrgCollections(models); + + await models.User.create([ + { name: 'Alice', email: 'alice@backup.test', username: 'alice' }, + { name: 'Bob', email: 'bob@backup.test', username: 'bob' }, + { name: 'Charlie', email: 'charlie@backup.test', username: 'charlie' }, + ]); + + await models.Conversation.create([ + { + conversationId: 'conv_1', + user: 'alice_id', + title: 'Test conversation 1', + endpoint: 'openAI', + model: 'gpt-4', + }, + { + conversationId: 'conv_2', + user: 'bob_id', + title: 'Test conversation 2', + endpoint: 'openAI', + model: 'gpt-4', + }, + ]); + + await models.Message.create([ + { + messageId: 'msg_1', + conversationId: 'conv_1', + user: 'alice_id', + sender: 'user', + text: 'Hello world', + isCreatedByUser: true, + }, + { + messageId: 'msg_2', + conversationId: 'conv_1', + user: 'alice_id', + sender: 'GPT-4', + text: 'Hi there!', + isCreatedByUser: false, + }, + ]); + + const agentId = new mongoose.Types.ObjectId(); + await models.Agent.create({ + id: `agent_${agentId}`, + name: 'Test Agent', + author: new mongoose.Types.ObjectId(), + description: 'A test agent for backup', + provider: 'openAI', + model: 'gpt-4', + }); + }, 60_000); + + it('backs up all collections from the source org', async () => { + const srcConn = baseConn.useDb(`${DB_PREFIX}org_${sourceOrg}`, { useCache: true }); + const backup = await backupOrg(srcConn, sourceOrg); + + console.log(`[Backup] ${sourceOrg}:`); + console.log(` Timestamp: ${backup.timestamp.toISOString()}`); + console.log(` Collections: ${Object.keys(backup.collections).length}`); + let totalDocs = 0; + for (const [name, docs] of Object.entries(backup.collections)) { + if (docs.length > 0) { + console.log(` ${name}: ${docs.length} docs`); + totalDocs += docs.length; + } + } + console.log(` Total documents: ${totalDocs}`); + + expect(Object.keys(backup.collections).length).toBeGreaterThanOrEqual(4); + expect(backup.collections['users']?.length).toBe(3); + expect(backup.collections['conversations']?.length).toBe(2); + expect(backup.collections['messages']?.length).toBe(2); + }, 30_000); + + it('restores backup to a fresh org database', async () => { + const srcConn = baseConn.useDb(`${DB_PREFIX}org_${sourceOrg}`, { useCache: true }); + const backup = await backupOrg(srcConn, sourceOrg); + + const dstConn = baseConn.useDb(`${DB_PREFIX}org_${targetOrg}`, { useCache: true }); + const dstModels = registerModels(dstConn); + await initializeOrgCollections(dstModels); + + const { collectionsRestored, docsRestored } = await restoreOrg(dstConn, backup); + + console.log( + `[Restore] ${targetOrg}: ${collectionsRestored} collections, ${docsRestored} docs`, + ); + + expect(docsRestored).toBeGreaterThanOrEqual(7); + }, 60_000); + + it('verifies restored data matches source exactly', async () => { + const srcConn = baseConn.useDb(`${DB_PREFIX}org_${sourceOrg}`, { useCache: true }); + const dstConn = baseConn.useDb(`${DB_PREFIX}org_${targetOrg}`, { useCache: true }); + + const srcUsers = await srcConn.db!.collection('users').find({}).sort({ email: 1 }).toArray(); + const dstUsers = await dstConn.db!.collection('users').find({}).sort({ email: 1 }).toArray(); + + expect(dstUsers.length).toBe(srcUsers.length); + for (let i = 0; i < srcUsers.length; i++) { + expect(dstUsers[i].name).toBe(srcUsers[i].name); + expect(dstUsers[i].email).toBe(srcUsers[i].email); + expect(dstUsers[i]._id.toString()).toBe(srcUsers[i]._id.toString()); + } + + const srcMsgs = await srcConn + .db!.collection('messages') + .find({}) + .sort({ messageId: 1 }) + .toArray(); + const dstMsgs = await dstConn + .db!.collection('messages') + .find({}) + .sort({ messageId: 1 }) + .toArray(); + + expect(dstMsgs.length).toBe(srcMsgs.length); + for (let i = 0; i < srcMsgs.length; i++) { + expect(dstMsgs[i].messageId).toBe(srcMsgs[i].messageId); + expect(dstMsgs[i].text).toBe(srcMsgs[i].text); + expect(dstMsgs[i]._id.toString()).toBe(srcMsgs[i]._id.toString()); + } + + const srcConvos = await srcConn + .db!.collection('conversations') + .find({}) + .sort({ conversationId: 1 }) + .toArray(); + const dstConvos = await dstConn + .db!.collection('conversations') + .find({}) + .sort({ conversationId: 1 }) + .toArray(); + + expect(dstConvos.length).toBe(srcConvos.length); + for (let i = 0; i < srcConvos.length; i++) { + expect(dstConvos[i].conversationId).toBe(srcConvos[i].conversationId); + expect(dstConvos[i].title).toBe(srcConvos[i].title); + } + + console.log('[Restore] Data integrity verified: _ids, fields, and counts match exactly'); + }, 30_000); + + it('verifies BSON type preservation (ObjectId, Date, Number)', async () => { + const dstConn = baseConn.useDb(`${DB_PREFIX}org_${targetOrg}`, { useCache: true }); + + const user = await dstConn.db!.collection('users').findOne({ email: 'alice@backup.test' }); + expect(user).toBeDefined(); + expect(user!._id).toBeInstanceOf(mongoose.Types.ObjectId); + expect(user!.createdAt).toBeInstanceOf(Date); + + const agent = await dstConn.db!.collection('agents').findOne({}); + expect(agent).toBeDefined(); + expect(agent!._id).toBeInstanceOf(mongoose.Types.ObjectId); + expect(typeof agent!.name).toBe('string'); + + console.log('[Restore] BSON types preserved: ObjectId, Date, String all correct'); + }); + + it('measures backup and restore performance', async () => { + const srcConn = baseConn.useDb(`${DB_PREFIX}org_${sourceOrg}`, { useCache: true }); + + const backupStart = Date.now(); + const backup = await backupOrg(srcConn, sourceOrg); + const backupMs = Date.now() - backupStart; + + const freshDb = `${DB_PREFIX}org_perf_restore`; + createdDbs.push(freshDb); + const freshConn = baseConn.useDb(freshDb, { useCache: false }); + const freshModels = registerModels(freshConn); + await initializeOrgCollections(freshModels); + + const restoreStart = Date.now(); + await restoreOrg(freshConn, backup); + const restoreMs = Date.now() - restoreStart; + + const totalDocs = Object.values(backup.collections).reduce((s, d) => s + d.length, 0); + console.log( + `[Perf] Backup: ${backupMs}ms (${totalDocs} docs across ${Object.keys(backup.collections).length} collections)`, + ); + console.log(`[Perf] Restore: ${restoreMs}ms`); + + expect(backupMs).toBeLessThan(5000); + expect(restoreMs).toBeLessThan(5000); + }, 60_000); + }); + + // ─── SCHEMA MIGRATION ────────────────────────────────────────────────── + + describe('schema migration across orgs', () => { + const migrationOrgs = ['mig_1', 'mig_2', 'mig_3', 'mig_4', 'mig_5']; + + beforeAll(async () => { + for (const orgId of migrationOrgs) { + const dbName = `${DB_PREFIX}org_${orgId}`; + createdDbs.push(dbName); + const conn = baseConn.useDb(dbName, { useCache: true }); + const models = registerModels(conn); + await initializeOrgCollections(models); + + await models.User.create({ + name: `User ${orgId}`, + email: `user@${orgId}.test`, + username: orgId, + }); + } + }, 120_000); + + it('createIndexes is idempotent (no-op for existing indexes)', async () => { + const conn = baseConn.useDb(`${DB_PREFIX}org_mig_1`, { useCache: true }); + const models = registerModels(conn); + + const beforeIndexes = await models.User.collection.indexes(); + + const t0 = Date.now(); + await initializeOrgCollections(models); + const ms = Date.now() - t0; + + const afterIndexes = await models.User.collection.indexes(); + + expect(afterIndexes.length).toBe(beforeIndexes.length); + console.log( + `[Migration] Idempotent re-init: ${ms}ms (indexes unchanged: ${beforeIndexes.length})`, + ); + }, 60_000); + + it('adds a new collection to all existing orgs', async () => { + const newSchema = new Schema( + { + orgId: { type: String, index: true }, + eventType: { type: String, required: true, index: true }, + payload: Schema.Types.Mixed, + userId: { type: Schema.Types.ObjectId, index: true }, + }, + { timestamps: true }, + ); + newSchema.index({ orgId: 1, eventType: 1, createdAt: -1 }); + + for (const orgId of migrationOrgs) { + const conn = baseConn.useDb(`${DB_PREFIX}org_${orgId}`, { useCache: true }); + const AuditLog = conn.models['AuditLog'] || conn.model('AuditLog', newSchema); + await AuditLog.createCollection(); + await createIndexesWithRetry(AuditLog); + } + + for (const orgId of migrationOrgs) { + const conn = baseConn.useDb(`${DB_PREFIX}org_${orgId}`, { useCache: true }); + const collections = (await conn.db!.listCollections().toArray()).map((c) => c.name); + expect(collections).toContain('auditlogs'); + + const indexes = await conn.db!.collection('auditlogs').indexes(); + expect(indexes.length).toBeGreaterThanOrEqual(4); + } + + console.log( + `[Migration] New collection 'auditlogs' added to ${migrationOrgs.length} orgs with 4+ indexes`, + ); + }, 60_000); + + it('adds a new index to an existing collection across all orgs', async () => { + const indexSpec = { username: 1, createdAt: -1 }; + + for (const orgId of migrationOrgs) { + const conn = baseConn.useDb(`${DB_PREFIX}org_${orgId}`, { useCache: true }); + await retryWithBackoff( + () => conn.db!.collection('users').createIndex(indexSpec, { background: true }), + `createIndex(users, username+createdAt) for ${orgId}`, + ); + } + + for (const orgId of migrationOrgs) { + const conn = baseConn.useDb(`${DB_PREFIX}org_${orgId}`, { useCache: true }); + const indexes = await conn.db!.collection('users').indexes(); + const hasNewIdx = indexes.some( + (idx: Record) => JSON.stringify(idx.key) === JSON.stringify(indexSpec), + ); + expect(hasNewIdx).toBe(true); + } + + console.log( + `[Migration] New compound index added to 'users' across ${migrationOrgs.length} orgs`, + ); + }, 60_000); + + it('runs migrateAllOrgs and reports progress', async () => { + const progress: string[] = []; + + const results = await migrateAllOrgs( + baseConn, + migrationOrgs, + MODEL_SCHEMAS, + (completed, total, result) => { + progress.push( + `${completed}/${total}: ${result.orgId} β€” ${result.totalMs}ms, ${result.newCollections.length} new collections`, + ); + }, + ); + + console.log(`[Migration] Full migration across ${migrationOrgs.length} orgs:`); + for (const p of progress) { + console.log(` ${p}`); + } + + const totalMs = results.reduce((s, r) => s + r.totalMs, 0); + const avgMs = Math.round(totalMs / results.length); + console.log(` Total: ${totalMs}ms, avg: ${avgMs}ms/org`); + + expect(results).toHaveLength(migrationOrgs.length); + expect(results.every((r) => r.indexResults.length >= MODEL_COUNT)).toBe(true); + }, 120_000); + + it('verifies existing data is preserved after migration', async () => { + for (const orgId of migrationOrgs) { + const conn = baseConn.useDb(`${DB_PREFIX}org_${orgId}`, { useCache: true }); + const user = await conn.db!.collection('users').findOne({ email: `user@${orgId}.test` }); + expect(user).toBeDefined(); + expect(user!.name).toBe(`User ${orgId}`); + } + + console.log( + `[Migration] All existing user data preserved across ${migrationOrgs.length} orgs`, + ); + }); + }); +}); diff --git a/packages/data-schemas/misc/ferretdb/promptLookup.ferretdb.spec.ts b/packages/data-schemas/misc/ferretdb/promptLookup.ferretdb.spec.ts new file mode 100644 index 0000000000..7e6c8ad1b0 --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/promptLookup.ferretdb.spec.ts @@ -0,0 +1,353 @@ +import mongoose, { Schema, Types } from 'mongoose'; + +/** + * Integration tests for the Prompt $lookup β†’ find + attach replacement. + * + * These verify that prompt group listing with production prompt + * resolution works identically on both MongoDB and FerretDB + * using only standard find/countDocuments (no $lookup). + * + * Run against FerretDB: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/prompt_lookup_test" npx jest promptLookup.ferretdb + * + * Run against MongoDB (for parity): + * FERRETDB_URI="mongodb://127.0.0.1:27017/prompt_lookup_test" npx jest promptLookup.ferretdb + */ + +const FERRETDB_URI = process.env.FERRETDB_URI; +const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip; + +const promptGroupSchema = new Schema( + { + name: { type: String, required: true, index: true }, + numberOfGenerations: { type: Number, default: 0 }, + oneliner: { type: String, default: '' }, + category: { type: String, default: '', index: true }, + productionId: { type: Schema.Types.ObjectId, ref: 'FDBPrompt', index: true }, + author: { type: Schema.Types.ObjectId, required: true, index: true }, + authorName: { type: String, required: true }, + command: { type: String }, + projectIds: { type: [Schema.Types.ObjectId], default: [] }, + }, + { timestamps: true }, +); + +const promptSchema = new Schema( + { + groupId: { type: Schema.Types.ObjectId, ref: 'FDBPromptGroup', required: true }, + author: { type: Schema.Types.ObjectId, required: true }, + prompt: { type: String, required: true }, + type: { type: String, enum: ['text', 'chat'], required: true }, + }, + { timestamps: true }, +); + +type PromptGroupDoc = mongoose.Document & { + name: string; + productionId: Types.ObjectId; + author: Types.ObjectId; + authorName: string; + category: string; + oneliner: string; + numberOfGenerations: number; + command?: string; + projectIds: Types.ObjectId[]; + createdAt: Date; + updatedAt: Date; +}; + +type PromptDoc = mongoose.Document & { + groupId: Types.ObjectId; + author: Types.ObjectId; + prompt: string; + type: string; +}; + +/** Mirrors the attachProductionPrompts helper from api/models/Prompt.js */ +async function attachProductionPrompts( + groups: Array>, + PromptModel: mongoose.Model, +): Promise>> { + const productionIds = groups.map((g) => g.productionId as Types.ObjectId).filter(Boolean); + + if (productionIds.length === 0) { + return groups.map((g) => ({ ...g, productionPrompt: null })); + } + + const prompts = await PromptModel.find({ _id: { $in: productionIds } }) + .select('prompt') + .lean(); + const promptMap = new Map(prompts.map((p) => [p._id.toString(), p])); + + return groups.map((g) => ({ + ...g, + productionPrompt: g.productionId + ? (promptMap.get((g.productionId as Types.ObjectId).toString()) ?? null) + : null, + })); +} + +describeIfFerretDB('Prompt $lookup replacement - FerretDB compatibility', () => { + let PromptGroup: mongoose.Model; + let Prompt: mongoose.Model; + + const authorId = new Types.ObjectId(); + + beforeAll(async () => { + await mongoose.connect(FERRETDB_URI as string); + PromptGroup = + (mongoose.models.FDBPromptGroup as mongoose.Model) || + mongoose.model('FDBPromptGroup', promptGroupSchema); + Prompt = + (mongoose.models.FDBPrompt as mongoose.Model) || + mongoose.model('FDBPrompt', promptSchema); + await PromptGroup.createCollection(); + await Prompt.createCollection(); + }); + + afterAll(async () => { + await mongoose.connection.dropDatabase(); + await mongoose.disconnect(); + }); + + afterEach(async () => { + await PromptGroup.deleteMany({}); + await Prompt.deleteMany({}); + }); + + async function seedGroupWithPrompt( + name: string, + promptText: string, + extra: Record = {}, + ) { + const group = await PromptGroup.create({ + name, + author: authorId, + authorName: 'Test User', + productionId: new Types.ObjectId(), + ...extra, + }); + + const prompt = await Prompt.create({ + groupId: group._id, + author: authorId, + prompt: promptText, + type: 'text', + }); + + await PromptGroup.updateOne({ _id: group._id }, { productionId: prompt._id }); + return { + group: (await PromptGroup.findById(group._id).lean()) as Record, + prompt, + }; + } + + describe('attachProductionPrompts', () => { + it('should attach production prompt text to groups', async () => { + await seedGroupWithPrompt('Group 1', 'Hello {{name}}'); + await seedGroupWithPrompt('Group 2', 'Summarize this: {{text}}'); + + const groups = await PromptGroup.find({}).sort({ name: 1 }).lean(); + const result = await attachProductionPrompts( + groups as Array>, + Prompt, + ); + + expect(result).toHaveLength(2); + expect(result[0].name).toBe('Group 1'); + expect((result[0].productionPrompt as Record).prompt).toBe('Hello {{name}}'); + expect(result[1].name).toBe('Group 2'); + expect((result[1].productionPrompt as Record).prompt).toBe( + 'Summarize this: {{text}}', + ); + }); + + it('should handle groups with no productionId', async () => { + await PromptGroup.create({ + name: 'Empty Group', + author: authorId, + authorName: 'Test User', + productionId: null as unknown as Types.ObjectId, + }); + + const groups = await PromptGroup.find({}).lean(); + const result = await attachProductionPrompts( + groups as Array>, + Prompt, + ); + + expect(result).toHaveLength(1); + expect(result[0].productionPrompt).toBeNull(); + }); + + it('should handle deleted production prompts gracefully', async () => { + await seedGroupWithPrompt('Orphaned', 'old text'); + await Prompt.deleteMany({}); + + const groups = await PromptGroup.find({}).lean(); + const result = await attachProductionPrompts( + groups as Array>, + Prompt, + ); + + expect(result).toHaveLength(1); + expect(result[0].productionPrompt).toBeNull(); + }); + + it('should preserve productionId as the ObjectId (not overwritten)', async () => { + const { prompt } = await seedGroupWithPrompt('Preserved', 'keep id'); + + const groups = await PromptGroup.find({}).lean(); + const result = await attachProductionPrompts( + groups as Array>, + Prompt, + ); + + expect((result[0].productionId as Types.ObjectId).toString()).toBe( + (prompt._id as Types.ObjectId).toString(), + ); + expect((result[0].productionPrompt as Record).prompt).toBe('keep id'); + }); + }); + + describe('paginated query pattern (getPromptGroups replacement)', () => { + it('should return paginated groups with production prompts', async () => { + for (let i = 0; i < 5; i++) { + await seedGroupWithPrompt(`Prompt ${i}`, `Content ${i}`); + } + + const query = { author: authorId }; + const skip = 0; + const limit = 3; + + const [groups, total] = await Promise.all([ + PromptGroup.find(query) + .sort({ createdAt: -1 }) + .skip(skip) + .limit(limit) + .select( + 'name numberOfGenerations oneliner category projectIds productionId author authorName createdAt updatedAt', + ) + .lean(), + PromptGroup.countDocuments(query), + ]); + + const result = await attachProductionPrompts( + groups as Array>, + Prompt, + ); + + expect(total).toBe(5); + expect(result).toHaveLength(3); + for (const group of result) { + expect(group.productionPrompt).toBeDefined(); + expect(group.productionPrompt).not.toBeNull(); + } + }); + + it('should correctly compute page count', async () => { + for (let i = 0; i < 7; i++) { + await seedGroupWithPrompt(`Page ${i}`, `Content ${i}`); + } + + const total = await PromptGroup.countDocuments({ author: authorId }); + const pageSize = 3; + const pages = Math.ceil(total / pageSize); + + expect(pages).toBe(3); + }); + }); + + describe('cursor-based pagination pattern (getListPromptGroupsByAccess replacement)', () => { + it('should return groups filtered by accessible IDs with has_more', async () => { + const seeded = []; + for (let i = 0; i < 5; i++) { + const { group } = await seedGroupWithPrompt(`Access ${i}`, `Content ${i}`); + seeded.push(group); + } + + const accessibleIds = seeded.slice(0, 3).map((g) => g._id as Types.ObjectId); + const normalizedLimit = 2; + + const groups = await PromptGroup.find({ _id: { $in: accessibleIds } }) + .sort({ updatedAt: -1, _id: 1 }) + .limit(normalizedLimit + 1) + .select( + 'name numberOfGenerations oneliner category projectIds productionId author authorName createdAt updatedAt', + ) + .lean(); + + const result = await attachProductionPrompts( + groups as Array>, + Prompt, + ); + + const hasMore = result.length > normalizedLimit; + const data = result.slice(0, normalizedLimit); + + expect(hasMore).toBe(true); + expect(data).toHaveLength(2); + for (const group of data) { + expect(group.productionPrompt).not.toBeNull(); + } + }); + + it('should return all groups when no limit is set', async () => { + const seeded = []; + for (let i = 0; i < 4; i++) { + const { group } = await seedGroupWithPrompt(`NoLimit ${i}`, `Content ${i}`); + seeded.push(group); + } + + const accessibleIds = seeded.map((g) => g._id as Types.ObjectId); + const groups = await PromptGroup.find({ _id: { $in: accessibleIds } }) + .sort({ updatedAt: -1, _id: 1 }) + .select( + 'name numberOfGenerations oneliner category projectIds productionId author authorName createdAt updatedAt', + ) + .lean(); + + const result = await attachProductionPrompts( + groups as Array>, + Prompt, + ); + + expect(result).toHaveLength(4); + }); + }); + + describe('output shape matches original $lookup pipeline', () => { + it('should produce the same field structure as the aggregation', async () => { + await seedGroupWithPrompt('Shape Test', 'Check all fields', { + category: 'testing', + oneliner: 'A test prompt', + numberOfGenerations: 5, + }); + + const groups = await PromptGroup.find({}) + .select( + 'name numberOfGenerations oneliner category projectIds productionId author authorName createdAt updatedAt', + ) + .lean(); + const result = await attachProductionPrompts( + groups as Array>, + Prompt, + ); + + const item = result[0]; + expect(item.name).toBe('Shape Test'); + expect(item.numberOfGenerations).toBe(5); + expect(item.oneliner).toBe('A test prompt'); + expect(item.category).toBe('testing'); + expect(item.projectIds).toEqual([]); + expect(item.productionId).toBeDefined(); + expect(item.author).toBeDefined(); + expect(item.authorName).toBe('Test User'); + expect(item.createdAt).toBeInstanceOf(Date); + expect(item.updatedAt).toBeInstanceOf(Date); + expect(item.productionPrompt).toBeDefined(); + expect((item.productionPrompt as Record).prompt).toBe('Check all fields'); + expect((item.productionPrompt as Record)._id).toBeDefined(); + }); + }); +}); diff --git a/packages/data-schemas/misc/ferretdb/pullAll.ferretdb.spec.ts b/packages/data-schemas/misc/ferretdb/pullAll.ferretdb.spec.ts new file mode 100644 index 0000000000..446cb701d1 --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/pullAll.ferretdb.spec.ts @@ -0,0 +1,297 @@ +import mongoose, { Schema, Types } from 'mongoose'; + +/** + * Integration tests for $pullAll compatibility with FerretDB. + * + * These tests verify that the $pull β†’ $pullAll migration works + * identically on both MongoDB and FerretDB by running against + * a real database specified via FERRETDB_URI env var. + * + * Run against FerretDB: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/pullall_test" npx jest pullAll.ferretdb + * + * Run against MongoDB (for parity): + * FERRETDB_URI="mongodb://127.0.0.1:27017/pullall_test" npx jest pullAll.ferretdb + */ + +const FERRETDB_URI = process.env.FERRETDB_URI; + +const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip; + +const groupSchema = new Schema({ + name: { type: String, required: true }, + memberIds: [{ type: String }], +}); + +const conversationSchema = new Schema({ + conversationId: { type: String, required: true, unique: true }, + user: { type: String }, + tags: { type: [String], default: [] }, +}); + +const projectSchema = new Schema({ + name: { type: String, required: true }, + promptGroupIds: { type: [Schema.Types.ObjectId], default: [] }, + agentIds: { type: [String], default: [] }, +}); + +const agentSchema = new Schema({ + name: { type: String, required: true }, + projectIds: { type: [String], default: [] }, + tool_resources: { type: Schema.Types.Mixed, default: {} }, +}); + +describeIfFerretDB('$pullAll FerretDB compatibility', () => { + let Group: mongoose.Model; + let Conversation: mongoose.Model; + let Project: mongoose.Model; + let Agent: mongoose.Model; + + beforeAll(async () => { + await mongoose.connect(FERRETDB_URI as string); + + Group = mongoose.models.FDBGroup || mongoose.model('FDBGroup', groupSchema); + Conversation = + mongoose.models.FDBConversation || mongoose.model('FDBConversation', conversationSchema); + Project = mongoose.models.FDBProject || mongoose.model('FDBProject', projectSchema); + Agent = mongoose.models.FDBAgent || mongoose.model('FDBAgent', agentSchema); + + await Group.createCollection(); + await Conversation.createCollection(); + await Project.createCollection(); + await Agent.createCollection(); + }); + + afterAll(async () => { + await mongoose.connection.dropDatabase(); + await mongoose.disconnect(); + }); + + afterEach(async () => { + await Group.deleteMany({}); + await Conversation.deleteMany({}); + await Project.deleteMany({}); + await Agent.deleteMany({}); + }); + + describe('scalar $pullAll (single value wrapped in array)', () => { + it('should remove a single memberId from a group', async () => { + const userId = new Types.ObjectId().toString(); + const otherUserId = new Types.ObjectId().toString(); + + await Group.create({ + name: 'Test Group', + memberIds: [userId, otherUserId], + }); + + await Group.updateMany({ memberIds: userId }, { $pullAll: { memberIds: [userId] } }); + + const updated = await Group.findOne({ name: 'Test Group' }).lean(); + const doc = updated as Record; + expect(doc.memberIds).toEqual([otherUserId]); + }); + + it('should remove a memberId from multiple groups at once', async () => { + const userId = new Types.ObjectId().toString(); + + await Group.create([ + { name: 'Group A', memberIds: [userId, 'other-1'] }, + { name: 'Group B', memberIds: [userId, 'other-2'] }, + { name: 'Group C', memberIds: ['other-3'] }, + ]); + + await Group.updateMany({ memberIds: userId }, { $pullAll: { memberIds: [userId] } }); + + const groups = await Group.find({}).sort({ name: 1 }).lean(); + const docs = groups as Array>; + expect(docs[0].memberIds).toEqual(['other-1']); + expect(docs[1].memberIds).toEqual(['other-2']); + expect(docs[2].memberIds).toEqual(['other-3']); + }); + + it('should remove a tag from conversations', async () => { + const user = 'user-123'; + const tag = 'important'; + + await Conversation.create([ + { conversationId: 'conv-1', user, tags: [tag, 'other'] }, + { conversationId: 'conv-2', user, tags: [tag] }, + { conversationId: 'conv-3', user, tags: ['other'] }, + ]); + + await Conversation.updateMany({ user, tags: tag }, { $pullAll: { tags: [tag] } }); + + const convos = await Conversation.find({}).sort({ conversationId: 1 }).lean(); + const docs = convos as Array>; + expect(docs[0].tags).toEqual(['other']); + expect(docs[1].tags).toEqual([]); + expect(docs[2].tags).toEqual(['other']); + }); + + it('should remove a single agentId from all projects', async () => { + const agentId = 'agent-to-remove'; + + await Project.create([ + { name: 'Proj A', agentIds: [agentId, 'agent-keep'] }, + { name: 'Proj B', agentIds: ['agent-keep'] }, + ]); + + await Project.updateMany({}, { $pullAll: { agentIds: [agentId] } }); + + const projects = await Project.find({}).sort({ name: 1 }).lean(); + const docs = projects as Array>; + expect(docs[0].agentIds).toEqual(['agent-keep']); + expect(docs[1].agentIds).toEqual(['agent-keep']); + }); + + it('should be a no-op when the value does not exist in the array', async () => { + await Group.create({ name: 'Stable Group', memberIds: ['a', 'b'] }); + + await Group.updateMany( + { memberIds: 'nonexistent' }, + { $pullAll: { memberIds: ['nonexistent'] } }, + ); + + const group = await Group.findOne({ name: 'Stable Group' }).lean(); + const doc = group as Record; + expect(doc.memberIds).toEqual(['a', 'b']); + }); + }); + + describe('multi-value $pullAll (replacing $pull + $in)', () => { + it('should remove multiple promptGroupIds from a project', async () => { + const ids = [new Types.ObjectId(), new Types.ObjectId(), new Types.ObjectId()]; + + await Project.create({ + name: 'Test Project', + promptGroupIds: ids, + }); + + const toRemove = [ids[0], ids[2]]; + await Project.findOneAndUpdate( + { name: 'Test Project' }, + { $pullAll: { promptGroupIds: toRemove } }, + { new: true }, + ); + + const updated = await Project.findOne({ name: 'Test Project' }).lean(); + const doc = updated as Record; + const remaining = (doc.promptGroupIds as Types.ObjectId[]).map((id) => id.toString()); + expect(remaining).toEqual([ids[1].toString()]); + }); + + it('should remove multiple agentIds from a project', async () => { + await Project.create({ + name: 'Agent Project', + agentIds: ['a1', 'a2', 'a3', 'a4'], + }); + + await Project.findOneAndUpdate( + { name: 'Agent Project' }, + { $pullAll: { agentIds: ['a1', 'a3'] } }, + { new: true }, + ); + + const updated = await Project.findOne({ name: 'Agent Project' }).lean(); + const doc = updated as Record; + expect(doc.agentIds).toEqual(['a2', 'a4']); + }); + + it('should remove projectIds from an agent', async () => { + await Agent.create({ + name: 'Test Agent', + projectIds: ['p1', 'p2', 'p3'], + }); + + await Agent.findOneAndUpdate( + { name: 'Test Agent' }, + { $pullAll: { projectIds: ['p1', 'p3'] } }, + { new: true }, + ); + + const updated = await Agent.findOne({ name: 'Test Agent' }).lean(); + const doc = updated as Record; + expect(doc.projectIds).toEqual(['p2']); + }); + + it('should handle removing from nested dynamic paths (tool_resources)', async () => { + await Agent.create({ + name: 'Resource Agent', + tool_resources: { + code_interpreter: { file_ids: ['f1', 'f2', 'f3'] }, + file_search: { file_ids: ['f4', 'f5'] }, + }, + }); + + const pullAllOps: Record = {}; + const filesByResource = { + code_interpreter: ['f1', 'f3'], + file_search: ['f5'], + }; + + for (const [resource, fileIds] of Object.entries(filesByResource)) { + pullAllOps[`tool_resources.${resource}.file_ids`] = fileIds; + } + + await Agent.findOneAndUpdate( + { name: 'Resource Agent' }, + { $pullAll: pullAllOps }, + { new: true }, + ); + + const updated = await Agent.findOne({ name: 'Resource Agent' }).lean(); + const doc = updated as unknown as Record; + expect(doc.tool_resources.code_interpreter.file_ids).toEqual(['f2']); + expect(doc.tool_resources.file_search.file_ids).toEqual(['f4']); + }); + + it('should handle empty array (no-op)', async () => { + await Project.create({ + name: 'Unchanged', + agentIds: ['a1', 'a2'], + }); + + await Project.findOneAndUpdate( + { name: 'Unchanged' }, + { $pullAll: { agentIds: [] } }, + { new: true }, + ); + + const updated = await Project.findOne({ name: 'Unchanged' }).lean(); + const doc = updated as Record; + expect(doc.agentIds).toEqual(['a1', 'a2']); + }); + + it('should handle values not present in the array', async () => { + await Project.create({ + name: 'Partial', + agentIds: ['a1', 'a2'], + }); + + await Project.findOneAndUpdate( + { name: 'Partial' }, + { $pullAll: { agentIds: ['a1', 'nonexistent'] } }, + { new: true }, + ); + + const updated = await Project.findOne({ name: 'Partial' }).lean(); + const doc = updated as Record; + expect(doc.agentIds).toEqual(['a2']); + }); + }); + + describe('duplicate handling', () => { + it('should remove all occurrences of a duplicated value', async () => { + await Group.create({ + name: 'Dupes Group', + memberIds: ['a', 'b', 'a', 'c', 'a'], + }); + + await Group.updateMany({ name: 'Dupes Group' }, { $pullAll: { memberIds: ['a'] } }); + + const updated = await Group.findOne({ name: 'Dupes Group' }).lean(); + const doc = updated as Record; + expect(doc.memberIds).toEqual(['b', 'c']); + }); + }); +}); diff --git a/packages/data-schemas/misc/ferretdb/pullSubdocument.ferretdb.spec.ts b/packages/data-schemas/misc/ferretdb/pullSubdocument.ferretdb.spec.ts new file mode 100644 index 0000000000..6a7651b055 --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/pullSubdocument.ferretdb.spec.ts @@ -0,0 +1,199 @@ +import mongoose, { Schema } from 'mongoose'; + +/** + * Integration tests to verify whether $pull with condition objects + * works on FerretDB v2.x. The v1.24 docs listed $pull as supported, + * but the v2.x array update operator docs only list $push, $addToSet, + * $pop, and $pullAll. + * + * This test covers the 3 patterns used in api/models/Agent.js: + * 1. $pull { edges: { to: id } } -- simple condition object + * 2. $pull { favorites: { agentId: id } } -- single scalar match + * 3. $pull { favorites: { agentId: { $in: [...] } } } -- $in condition + * + * Run against FerretDB: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/pull_subdoc_test" npx jest pullSubdocument.ferretdb + * + * Run against MongoDB (for parity): + * FERRETDB_URI="mongodb://127.0.0.1:27017/pull_subdoc_test" npx jest pullSubdocument.ferretdb + */ + +const FERRETDB_URI = process.env.FERRETDB_URI; +const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip; + +const agentSchema = new Schema({ + name: { type: String, required: true }, + edges: { type: [Schema.Types.Mixed], default: [] }, +}); + +const userSchema = new Schema({ + name: { type: String, required: true }, + favorites: { + type: [ + { + _id: false, + agentId: String, + model: String, + endpoint: String, + }, + ], + default: [], + }, +}); + +type AgentDoc = mongoose.InferSchemaType; +type UserDoc = mongoose.InferSchemaType; + +describeIfFerretDB('$pull with condition objects - FerretDB v2 verification', () => { + let Agent: mongoose.Model; + let User: mongoose.Model; + + beforeAll(async () => { + await mongoose.connect(FERRETDB_URI as string); + Agent = mongoose.model('TestPullAgent', agentSchema); + User = mongoose.model('TestPullUser', userSchema); + }); + + afterAll(async () => { + await mongoose.connection.db?.dropDatabase(); + await mongoose.disconnect(); + }); + + beforeEach(async () => { + await Agent.deleteMany({}); + await User.deleteMany({}); + }); + + describe('Pattern 1: $pull { edges: { to: id } }', () => { + it('should remove edge subdocuments matching a condition', async () => { + await Agent.create({ + name: 'Agent A', + edges: [ + { from: 'a', to: 'b', edgeType: 'handoff' }, + { from: 'a', to: 'c', edgeType: 'direct' }, + { from: 'a', to: 'b', edgeType: 'direct' }, + ], + }); + + await Agent.updateMany({ 'edges.to': 'b' }, { $pull: { edges: { to: 'b' } } }); + + const result = await Agent.findOne({ name: 'Agent A' }).lean(); + expect(result?.edges).toHaveLength(1); + expect((result?.edges[0] as Record).to).toBe('c'); + }); + + it('should not affect agents without matching edges', async () => { + await Agent.create({ + name: 'Agent B', + edges: [{ from: 'x', to: 'y' }], + }); + + await Agent.updateMany({ 'edges.to': 'z' }, { $pull: { edges: { to: 'z' } } }); + + const result = await Agent.findOne({ name: 'Agent B' }).lean(); + expect(result?.edges).toHaveLength(1); + }); + }); + + describe('Pattern 2: $pull { favorites: { agentId: id } }', () => { + it('should remove favorite subdocuments matching agentId', async () => { + await User.create({ + name: 'User 1', + favorites: [ + { agentId: 'agent_1' }, + { agentId: 'agent_2' }, + { model: 'gpt-4', endpoint: 'openAI' }, + ], + }); + + await User.updateMany( + { 'favorites.agentId': 'agent_1' }, + { $pull: { favorites: { agentId: 'agent_1' } } }, + ); + + const result = await User.findOne({ name: 'User 1' }).lean(); + expect(result?.favorites).toHaveLength(2); + + const agentIds = result?.favorites.map((f) => f.agentId).filter(Boolean); + expect(agentIds).toEqual(['agent_2']); + }); + + it('should remove from multiple users at once', async () => { + await User.create([ + { + name: 'User A', + favorites: [{ agentId: 'target' }, { agentId: 'keep' }], + }, + { + name: 'User B', + favorites: [{ agentId: 'target' }], + }, + { + name: 'User C', + favorites: [{ agentId: 'keep' }], + }, + ]); + + await User.updateMany( + { 'favorites.agentId': 'target' }, + { $pull: { favorites: { agentId: 'target' } } }, + ); + + const users = await User.find({}).sort({ name: 1 }).lean(); + expect(users[0].favorites).toHaveLength(1); + expect(users[0].favorites[0].agentId).toBe('keep'); + expect(users[1].favorites).toHaveLength(0); + expect(users[2].favorites).toHaveLength(1); + expect(users[2].favorites[0].agentId).toBe('keep'); + }); + }); + + describe('Pattern 3: $pull { favorites: { agentId: { $in: [...] } } }', () => { + it('should remove favorites matching any agentId in the array', async () => { + await User.create({ + name: 'Bulk User', + favorites: [ + { agentId: 'a1' }, + { agentId: 'a2' }, + { agentId: 'a3' }, + { model: 'gpt-4', endpoint: 'openAI' }, + ], + }); + + await User.updateMany( + { 'favorites.agentId': { $in: ['a1', 'a3'] } }, + { $pull: { favorites: { agentId: { $in: ['a1', 'a3'] } } } }, + ); + + const result = await User.findOne({ name: 'Bulk User' }).lean(); + expect(result?.favorites).toHaveLength(2); + + const agentIds = result?.favorites.map((f) => f.agentId).filter(Boolean); + expect(agentIds).toEqual(['a2']); + }); + + it('should work across multiple users with $in', async () => { + await User.create([ + { + name: 'Multi A', + favorites: [{ agentId: 'x' }, { agentId: 'y' }, { agentId: 'z' }], + }, + { + name: 'Multi B', + favorites: [{ agentId: 'x' }, { agentId: 'z' }], + }, + ]); + + await User.updateMany( + { 'favorites.agentId': { $in: ['x', 'y'] } }, + { $pull: { favorites: { agentId: { $in: ['x', 'y'] } } } }, + ); + + const users = await User.find({}).sort({ name: 1 }).lean(); + expect(users[0].favorites).toHaveLength(1); + expect(users[0].favorites[0].agentId).toBe('z'); + expect(users[1].favorites).toHaveLength(1); + expect(users[1].favorites[0].agentId).toBe('z'); + }); + }); +}); diff --git a/packages/data-schemas/misc/ferretdb/randomPrompts.ferretdb.spec.ts b/packages/data-schemas/misc/ferretdb/randomPrompts.ferretdb.spec.ts new file mode 100644 index 0000000000..ccc274d7fc --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/randomPrompts.ferretdb.spec.ts @@ -0,0 +1,210 @@ +import mongoose, { Schema, Types } from 'mongoose'; + +/** + * Integration tests for $sample β†’ app-level shuffle replacement. + * + * The original getRandomPromptGroups used a $sample aggregation stage + * (unsupported by FerretDB). It was replaced with: + * 1. PromptGroup.distinct('category', { category: { $ne: '' } }) + * 2. Fisher-Yates shuffle of the categories array + * 3. PromptGroup.find({ category: { $in: selectedCategories } }) + * 4. Deduplicate (one group per category) and order by shuffled categories + * + * Run against FerretDB: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/random_prompts_test" npx jest randomPrompts.ferretdb + * + * Run against MongoDB (for parity): + * FERRETDB_URI="mongodb://127.0.0.1:27017/random_prompts_test" npx jest randomPrompts.ferretdb + */ + +const FERRETDB_URI = process.env.FERRETDB_URI; + +const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip; + +const promptGroupSchema = new Schema({ + name: { type: String, required: true }, + category: { type: String, default: '' }, + author: { type: Schema.Types.ObjectId, required: true }, + authorName: { type: String, default: '' }, +}); + +/** Reproduces the refactored getRandomPromptGroups logic */ +async function getRandomPromptGroups( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + PromptGroup: mongoose.Model, + filter: { limit: number; skip: number }, +) { + const categories: string[] = await PromptGroup.distinct('category', { category: { $ne: '' } }); + + for (let i = categories.length - 1; i > 0; i--) { + const j = Math.floor(Math.random() * (i + 1)); + [categories[i], categories[j]] = [categories[j], categories[i]]; + } + + const skip = +filter.skip; + const limit = +filter.limit; + const selectedCategories = categories.slice(skip, skip + limit); + + if (selectedCategories.length === 0) { + return { prompts: [] }; + } + + const groups = await PromptGroup.find({ category: { $in: selectedCategories } }).lean(); + + const groupByCategory = new Map(); + for (const group of groups) { + const cat = (group as Record).category; + if (!groupByCategory.has(cat)) { + groupByCategory.set(cat, group); + } + } + + const prompts = selectedCategories.map((cat: string) => groupByCategory.get(cat)).filter(Boolean); + + return { prompts }; +} + +describeIfFerretDB('Random prompts $sample replacement - FerretDB compatibility', () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + let PromptGroup: mongoose.Model; + const authorId = new Types.ObjectId(); + + beforeAll(async () => { + await mongoose.connect(FERRETDB_URI as string); + PromptGroup = mongoose.model('TestRandPromptGroup', promptGroupSchema); + }); + + afterAll(async () => { + await mongoose.connection.db?.dropDatabase(); + await mongoose.disconnect(); + }); + + beforeEach(async () => { + await PromptGroup.deleteMany({}); + }); + + describe('distinct categories + $in query', () => { + it('should return one group per category', async () => { + await PromptGroup.insertMany([ + { name: 'Code A', category: 'code', author: authorId, authorName: 'User' }, + { name: 'Code B', category: 'code', author: authorId, authorName: 'User' }, + { name: 'Write A', category: 'writing', author: authorId, authorName: 'User' }, + { name: 'Write B', category: 'writing', author: authorId, authorName: 'User' }, + { name: 'Math A', category: 'math', author: authorId, authorName: 'User' }, + ]); + + const result = await getRandomPromptGroups(PromptGroup, { limit: 10, skip: 0 }); + expect(result.prompts).toHaveLength(3); + + const categories = result.prompts.map((p: Record) => p.category).sort(); + expect(categories).toEqual(['code', 'math', 'writing']); + }); + + it('should exclude groups with empty category', async () => { + await PromptGroup.insertMany([ + { name: 'Has Category', category: 'code', author: authorId, authorName: 'User' }, + { name: 'Empty Category', category: '', author: authorId, authorName: 'User' }, + ]); + + const result = await getRandomPromptGroups(PromptGroup, { limit: 10, skip: 0 }); + expect(result.prompts).toHaveLength(1); + expect((result.prompts[0] as Record).name).toBe('Has Category'); + }); + + it('should return empty array when no groups have categories', async () => { + await PromptGroup.insertMany([ + { name: 'No Cat 1', category: '', author: authorId, authorName: 'User' }, + { name: 'No Cat 2', category: '', author: authorId, authorName: 'User' }, + ]); + + const result = await getRandomPromptGroups(PromptGroup, { limit: 10, skip: 0 }); + expect(result.prompts).toHaveLength(0); + }); + + it('should return empty array when collection is empty', async () => { + const result = await getRandomPromptGroups(PromptGroup, { limit: 10, skip: 0 }); + expect(result.prompts).toHaveLength(0); + }); + }); + + describe('pagination (skip + limit)', () => { + it('should respect limit', async () => { + await PromptGroup.insertMany([ + { name: 'A', category: 'cat1', author: authorId, authorName: 'User' }, + { name: 'B', category: 'cat2', author: authorId, authorName: 'User' }, + { name: 'C', category: 'cat3', author: authorId, authorName: 'User' }, + { name: 'D', category: 'cat4', author: authorId, authorName: 'User' }, + { name: 'E', category: 'cat5', author: authorId, authorName: 'User' }, + ]); + + const result = await getRandomPromptGroups(PromptGroup, { limit: 3, skip: 0 }); + expect(result.prompts).toHaveLength(3); + }); + + it('should respect skip', async () => { + await PromptGroup.insertMany([ + { name: 'A', category: 'cat1', author: authorId, authorName: 'User' }, + { name: 'B', category: 'cat2', author: authorId, authorName: 'User' }, + { name: 'C', category: 'cat3', author: authorId, authorName: 'User' }, + { name: 'D', category: 'cat4', author: authorId, authorName: 'User' }, + ]); + + const result = await getRandomPromptGroups(PromptGroup, { limit: 10, skip: 2 }); + expect(result.prompts).toHaveLength(2); + }); + + it('should return empty when skip exceeds total categories', async () => { + await PromptGroup.insertMany([ + { name: 'A', category: 'cat1', author: authorId, authorName: 'User' }, + { name: 'B', category: 'cat2', author: authorId, authorName: 'User' }, + ]); + + const result = await getRandomPromptGroups(PromptGroup, { limit: 10, skip: 5 }); + expect(result.prompts).toHaveLength(0); + }); + }); + + describe('randomness', () => { + it('should produce varying orderings across multiple calls', async () => { + const categories = Array.from({ length: 10 }, (_, i) => `cat_${i}`); + await PromptGroup.insertMany( + categories.map((cat) => ({ + name: cat, + category: cat, + author: authorId, + authorName: 'User', + })), + ); + + const orderings = new Set(); + for (let i = 0; i < 20; i++) { + const result = await getRandomPromptGroups(PromptGroup, { limit: 10, skip: 0 }); + const order = result.prompts.map((p: Record) => p.category).join(','); + orderings.add(order); + } + + expect(orderings.size).toBeGreaterThan(1); + }); + }); + + describe('deduplication correctness', () => { + it('should return exactly one group per category even with many duplicates', async () => { + const docs = []; + for (let i = 0; i < 50; i++) { + docs.push({ + name: `Group ${i}`, + category: `cat_${i % 5}`, + author: authorId, + authorName: 'User', + }); + } + await PromptGroup.insertMany(docs); + + const result = await getRandomPromptGroups(PromptGroup, { limit: 10, skip: 0 }); + expect(result.prompts).toHaveLength(5); + + const categories = result.prompts.map((p: Record) => p.category).sort(); + expect(categories).toEqual(['cat_0', 'cat_1', 'cat_2', 'cat_3', 'cat_4']); + }); + }); +}); diff --git a/packages/data-schemas/misc/ferretdb/sharding.ferretdb.spec.ts b/packages/data-schemas/misc/ferretdb/sharding.ferretdb.spec.ts new file mode 100644 index 0000000000..e27e0bbe09 --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/sharding.ferretdb.spec.ts @@ -0,0 +1,522 @@ +import mongoose, { Schema, type Connection, type Model } from 'mongoose'; +import { + actionSchema, + agentSchema, + agentApiKeySchema, + agentCategorySchema, + assistantSchema, + balanceSchema, + bannerSchema, + conversationTagSchema, + convoSchema, + fileSchema, + keySchema, + messageSchema, + pluginAuthSchema, + presetSchema, + projectSchema, + promptSchema, + promptGroupSchema, + roleSchema, + sessionSchema, + shareSchema, + tokenSchema, + toolCallSchema, + transactionSchema, + userSchema, + memorySchema, + groupSchema, +} from '~/schema'; +import accessRoleSchema from '~/schema/accessRole'; +import aclEntrySchema from '~/schema/aclEntry'; +import mcpServerSchema from '~/schema/mcpServer'; + +/** + * Sharding PoC β€” self-contained proof-of-concept that exercises: + * 1. Multi-pool connection management via mongoose.createConnection() + * 2. Persistent orgβ†’pool assignment table with capacity limits + * 3. Lazy per-org model registration using all 29 LibreChat schemas + * 4. Cross-pool data isolation + * 5. Routing overhead measurement + * 6. Capacity overflow handling + * + * Both "pools" point to the same FerretDB for the PoC. + * In production each pool URI would be a separate FerretDB+Postgres pair. + * + * Run: + * FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/shard_poc" \ + * npx jest sharding.ferretdb --testTimeout=120000 + */ + +const FERRETDB_URI = process.env.FERRETDB_URI; +const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip; + +const DB_PREFIX = 'shard_poc_'; + +// ─── TYPES ────────────────────────────────────────────────────────────────── + +interface PoolConfig { + id: string; + uri: string; + maxOrgs: number; +} + +interface PoolStats { + orgCount: number; + maxOrgs: number; + available: number; +} + +// ─── ALL 29 LIBRECHAT SCHEMAS ─────────────────────────────────────────────── + +const MODEL_SCHEMAS: Record = { + User: userSchema, + Token: tokenSchema, + Session: sessionSchema, + Balance: balanceSchema, + Conversation: convoSchema, + Message: messageSchema, + Agent: agentSchema, + AgentApiKey: agentApiKeySchema, + AgentCategory: agentCategorySchema, + MCPServer: mcpServerSchema, + Role: roleSchema, + Action: actionSchema, + Assistant: assistantSchema, + File: fileSchema, + Banner: bannerSchema, + Project: projectSchema, + Key: keySchema, + PluginAuth: pluginAuthSchema, + Transaction: transactionSchema, + Preset: presetSchema, + Prompt: promptSchema, + PromptGroup: promptGroupSchema, + ConversationTag: conversationTagSchema, + SharedLink: shareSchema, + ToolCall: toolCallSchema, + MemoryEntry: memorySchema, + AccessRole: accessRoleSchema, + AclEntry: aclEntrySchema, + Group: groupSchema, +}; + +const MODEL_COUNT = Object.keys(MODEL_SCHEMAS).length; + +// ─── TENANT ROUTER (INLINE POC) ──────────────────────────────────────────── + +const assignmentSchema = new Schema({ + orgId: { type: String, required: true, unique: true, index: true }, + poolId: { type: String, required: true, index: true }, + createdAt: { type: Date, default: Date.now }, +}); + +class TenantRouter { + private pools: PoolConfig[] = []; + private poolConns = new Map(); + private orgConns = new Map(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + private orgModels = new Map>>(); + private assignmentCache = new Map(); + private controlConn!: Connection; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + private Assignment!: Model; + + async initialize(pools: PoolConfig[], controlUri: string): Promise { + this.pools = pools; + + this.controlConn = await mongoose.createConnection(controlUri).asPromise(); + this.Assignment = this.controlConn.model('OrgAssignment', assignmentSchema); + await this.Assignment.createCollection(); + await this.Assignment.createIndexes(); + + for (const pool of pools) { + const conn = await mongoose.createConnection(pool.uri).asPromise(); + this.poolConns.set(pool.id, conn); + } + } + + /** Resolve orgId β†’ Mongoose Connection for that org's database */ + async getOrgConnection(orgId: string): Promise { + const cached = this.orgConns.get(orgId); + if (cached) { + return cached; + } + + const poolId = await this.resolvePool(orgId); + const poolConn = this.poolConns.get(poolId); + if (!poolConn) { + throw new Error(`Pool ${poolId} not configured`); + } + + const orgConn = poolConn.useDb(`${DB_PREFIX}org_${orgId}`, { useCache: true }); + this.orgConns.set(orgId, orgConn); + return orgConn; + } + + /** Get all 29 models registered on an org's connection (lazy) */ + async getOrgModels(orgId: string): Promise>> { + const cached = this.orgModels.get(orgId); + if (cached) { + return cached; + } + + const conn = await this.getOrgConnection(orgId); + const models: Record> = {}; + for (const [name, schema] of Object.entries(MODEL_SCHEMAS)) { + models[name] = conn.models[name] || conn.model(name, schema); + } + this.orgModels.set(orgId, models); + return models; + } + + /** Convenience: get a single model for an org */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + async getModel(orgId: string, modelName: string): Promise> { + const models = await this.getOrgModels(orgId); + const model = models[modelName]; + if (!model) { + throw new Error(`Unknown model: ${modelName}`); + } + return model; + } + + /** Provision a new org: create all collections + indexes (with deadlock retry) */ + async initializeOrg(orgId: string): Promise { + const models = await this.getOrgModels(orgId); + const t0 = Date.now(); + for (const model of Object.values(models)) { + await model.createCollection(); + for (let attempt = 0; attempt < 3; attempt++) { + try { + await model.createIndexes(); + break; + } catch (err: unknown) { + const msg = (err as Error).message || ''; + if (msg.includes('deadlock') && attempt < 2) { + await new Promise((r) => setTimeout(r, 50 * (attempt + 1))); + continue; + } + throw err; + } + } + } + return Date.now() - t0; + } + + /** Assign org to a pool with capacity, or return existing assignment */ + async assignOrg(orgId: string): Promise { + const cached = this.assignmentCache.get(orgId); + if (cached) { + return cached; + } + + const existing = (await this.Assignment.findOne({ orgId }).lean()) as Record< + string, + unknown + > | null; + if (existing) { + const poolId = existing.poolId as string; + this.assignmentCache.set(orgId, poolId); + return poolId; + } + + const poolId = await this.selectPoolWithCapacity(); + + try { + await this.Assignment.create({ orgId, poolId }); + } catch (err: unknown) { + if ((err as Record).code === 11000) { + const doc = (await this.Assignment.findOne({ orgId }).lean()) as Record; + const existingPoolId = doc.poolId as string; + this.assignmentCache.set(orgId, existingPoolId); + return existingPoolId; + } + throw err; + } + + this.assignmentCache.set(orgId, poolId); + return poolId; + } + + /** Get per-pool statistics */ + async getPoolStats(): Promise> { + const stats: Record = {}; + for (const pool of this.pools) { + const orgCount = await this.Assignment.countDocuments({ poolId: pool.id }); + stats[pool.id] = { + orgCount, + maxOrgs: pool.maxOrgs, + available: pool.maxOrgs - orgCount, + }; + } + return stats; + } + + /** Which pool is an org on? (for test assertions) */ + getAssignment(orgId: string): string | undefined { + return this.assignmentCache.get(orgId); + } + + /** Drop all org databases and the control database */ + async destroyAll(): Promise { + const assignments = (await this.Assignment.find({}).lean()) as Array>; + + for (const a of assignments) { + const orgId = a.orgId as string; + const conn = this.orgConns.get(orgId); + if (conn) { + try { + await conn.dropDatabase(); + } catch { + /* best-effort */ + } + } + } + + try { + await this.controlConn.dropDatabase(); + } catch { + /* best-effort */ + } + } + + async shutdown(): Promise { + for (const conn of this.poolConns.values()) { + await conn.close(); + } + await this.controlConn.close(); + } + + private async resolvePool(orgId: string): Promise { + return this.assignOrg(orgId); + } + + private async selectPoolWithCapacity(): Promise { + for (const pool of this.pools) { + const count = await this.Assignment.countDocuments({ poolId: pool.id }); + if (count < pool.maxOrgs) { + return pool.id; + } + } + throw new Error('All pools at capacity. Add a new pool.'); + } +} + +// ─── TESTS ────────────────────────────────────────────────────────────────── + +describeIfFerretDB('Sharding PoC', () => { + let router: TenantRouter; + + const POOL_A = 'pool-a'; + const POOL_B = 'pool-b'; + const MAX_PER_POOL = 5; + + beforeAll(async () => { + router = new TenantRouter(); + + await router.initialize( + [ + { id: POOL_A, uri: FERRETDB_URI as string, maxOrgs: MAX_PER_POOL }, + { id: POOL_B, uri: FERRETDB_URI as string, maxOrgs: MAX_PER_POOL }, + ], + FERRETDB_URI as string, + ); + }, 30_000); + + afterAll(async () => { + await router.destroyAll(); + await router.shutdown(); + }, 120_000); + + describe('pool assignment and capacity', () => { + it('assigns first 5 orgs to pool A', async () => { + for (let i = 1; i <= 5; i++) { + const poolId = await router.assignOrg(`org_${i}`); + expect(poolId).toBe(POOL_A); + } + + const stats = await router.getPoolStats(); + expect(stats[POOL_A].orgCount).toBe(5); + expect(stats[POOL_A].available).toBe(0); + expect(stats[POOL_B].orgCount).toBe(0); + }); + + it('spills orgs 6-10 to pool B when pool A is full', async () => { + for (let i = 6; i <= 10; i++) { + const poolId = await router.assignOrg(`org_${i}`); + expect(poolId).toBe(POOL_B); + } + + const stats = await router.getPoolStats(); + expect(stats[POOL_A].orgCount).toBe(5); + expect(stats[POOL_B].orgCount).toBe(5); + }); + + it('throws when all pools are at capacity', async () => { + await expect(router.assignOrg('org_overflow')).rejects.toThrow('All pools at capacity'); + }); + + it('returns existing assignment on duplicate call (idempotent)', async () => { + const first = await router.assignOrg('org_1'); + const second = await router.assignOrg('org_1'); + expect(first).toBe(second); + expect(first).toBe(POOL_A); + }); + }); + + describe('org initialization and model registration', () => { + it('initializes an org with all 29 collections and indexes', async () => { + const ms = await router.initializeOrg('org_1'); + console.log(`[Sharding] org_1 init: ${ms}ms (29 collections + 98 indexes)`); + expect(ms).toBeGreaterThan(0); + }, 60_000); + + it('registers all 29 models lazily on the org connection', async () => { + const models = await router.getOrgModels('org_1'); + expect(Object.keys(models)).toHaveLength(MODEL_COUNT); + + for (const name of Object.keys(MODEL_SCHEMAS)) { + expect(models[name]).toBeDefined(); + expect(models[name].modelName).toBe(name); + } + }); + + it('initializes a second org on pool B', async () => { + const ms = await router.initializeOrg('org_6'); + console.log(`[Sharding] org_6 init: ${ms}ms (pool B)`); + + expect(router.getAssignment('org_1')).toBe(POOL_A); + expect(router.getAssignment('org_6')).toBe(POOL_B); + }, 60_000); + }); + + describe('cross-pool data isolation', () => { + it('inserts data in org_1 (pool A) β€” invisible from org_6 (pool B)', async () => { + const User1 = await router.getModel('org_1', 'User'); + const User6 = await router.getModel('org_6', 'User'); + + await User1.create({ name: 'Alice', email: 'alice@org1.test', username: 'alice1' }); + await User6.create({ name: 'Bob', email: 'bob@org6.test', username: 'bob6' }); + + const org1Users = await User1.find({}).lean(); + const org6Users = await User6.find({}).lean(); + + expect(org1Users).toHaveLength(1); + expect(org6Users).toHaveLength(1); + expect((org1Users[0] as Record).name).toBe('Alice'); + expect((org6Users[0] as Record).name).toBe('Bob'); + }); + + it('runs queries across orgs on different pools concurrently', async () => { + const Message1 = await router.getModel('org_1', 'Message'); + const Message6 = await router.getModel('org_6', 'Message'); + + await Promise.all([ + Message1.create({ + messageId: 'msg_a1', + conversationId: 'conv_a1', + user: 'user_org1', + sender: 'user', + text: 'hello from org 1', + isCreatedByUser: true, + }), + Message6.create({ + messageId: 'msg_b1', + conversationId: 'conv_b1', + user: 'user_org6', + sender: 'user', + text: 'hello from org 6', + isCreatedByUser: true, + }), + ]); + + const [m1, m6] = await Promise.all([ + Message1.findOne({ messageId: 'msg_a1' }).lean(), + Message6.findOne({ messageId: 'msg_b1' }).lean(), + ]); + + expect((m1 as Record).text).toBe('hello from org 1'); + expect((m6 as Record).text).toBe('hello from org 6'); + }); + }); + + describe('routing performance', () => { + it('measures cache-hit vs cold routing latency', async () => { + const iterations = 100; + + const coldStart = process.hrtime.bigint(); + router['assignmentCache'].delete('org_2'); + router['orgConns'].delete('org_2'); + router['orgModels'].delete('org_2'); + await router.getOrgModels('org_2'); + const coldNs = Number(process.hrtime.bigint() - coldStart) / 1e6; + + const times: number[] = []; + for (let i = 0; i < iterations; i++) { + const t0 = process.hrtime.bigint(); + await router.getOrgModels('org_1'); + times.push(Number(process.hrtime.bigint() - t0) / 1e6); + } + times.sort((a, b) => a - b); + + const avg = times.reduce((s, v) => s + v, 0) / times.length; + const p95 = times[Math.floor(times.length * 0.95)]; + + console.log(`[Sharding] Routing overhead:`); + console.log(` Cold (cache miss + DB lookup + model registration): ${coldNs.toFixed(2)}ms`); + console.log( + ` Warm cache hit (${iterations} iters): avg=${avg.toFixed(4)}ms, p95=${p95.toFixed(4)}ms`, + ); + + expect(avg).toBeLessThan(1); + }); + }); + + describe('bulk provisioning simulation', () => { + it('provisions all 10 assigned orgs with collections + indexes', async () => { + const orgIds = Array.from({ length: 10 }, (_, i) => `org_${i + 1}`); + const results: { orgId: string; pool: string; ms: number }[] = []; + + const totalStart = Date.now(); + for (const orgId of orgIds) { + const pool = router.getAssignment(orgId); + const ms = await router.initializeOrg(orgId); + results.push({ orgId, pool: pool ?? '?', ms }); + } + const totalMs = Date.now() - totalStart; + + console.log(`[Sharding] Bulk provisioned ${orgIds.length} orgs in ${totalMs}ms:`); + const poolATimes = results.filter((r) => r.pool === POOL_A).map((r) => r.ms); + const poolBTimes = results.filter((r) => r.pool === POOL_B).map((r) => r.ms); + const avgA = poolATimes.reduce((s, v) => s + v, 0) / poolATimes.length; + const avgB = poolBTimes.reduce((s, v) => s + v, 0) / poolBTimes.length; + console.log(` Pool A (${poolATimes.length} orgs): avg ${Math.round(avgA)}ms/org`); + console.log(` Pool B (${poolBTimes.length} orgs): avg ${Math.round(avgB)}ms/org`); + console.log(` Total: ${totalMs}ms (${Math.round(totalMs / orgIds.length)}ms/org)`); + + expect(results.every((r) => r.ms > 0)).toBe(true); + }, 120_000); + }); + + describe('simulated Express middleware pattern', () => { + it('demonstrates the request-scoped getModel pattern', async () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const fakeReq = { orgId: 'org_1' } as { + orgId: string; + getModel?: (name: string) => Promise>; + }; + + fakeReq.getModel = (modelName: string) => router.getModel(fakeReq.orgId, modelName); + + const User = await fakeReq.getModel!('User'); + const user = await User.findOne({ email: 'alice@org1.test' }).lean(); + expect((user as Record).name).toBe('Alice'); + + fakeReq.orgId = 'org_6'; + const User6 = await fakeReq.getModel!('User'); + const user6 = await User6.findOne({ email: 'bob@org6.test' }).lean(); + expect((user6 as Record).name).toBe('Bob'); + }); + }); +}); diff --git a/packages/data-schemas/misc/ferretdb/tsconfig.json b/packages/data-schemas/misc/ferretdb/tsconfig.json new file mode 100644 index 0000000000..ddd1855bd4 --- /dev/null +++ b/packages/data-schemas/misc/ferretdb/tsconfig.json @@ -0,0 +1,14 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "target": "ES2020", + "lib": ["ES2020"], + "baseUrl": "../..", + "paths": { + "~/*": ["./src/*"] + } + }, + "include": ["./**/*.ts"], + "exclude": ["node_modules"] +} diff --git a/packages/data-schemas/src/methods/aclEntry.ts b/packages/data-schemas/src/methods/aclEntry.ts index c1848960cc..ff27a7046f 100644 --- a/packages/data-schemas/src/methods/aclEntry.ts +++ b/packages/data-schemas/src/methods/aclEntry.ts @@ -307,7 +307,9 @@ export function createAclEntryMethods(mongoose: typeof import('mongoose')) { } if (removeBits) { - if (!update.$bit) update.$bit = {}; + if (!update.$bit) { + update.$bit = {}; + } const bitUpdate = update.$bit as Record; bitUpdate.permBits = { ...(bitUpdate.permBits as Record), and: ~removeBits }; } diff --git a/packages/data-schemas/src/methods/userGroup.ts b/packages/data-schemas/src/methods/userGroup.ts index bec28343fe..f6b57095dc 100644 --- a/packages/data-schemas/src/methods/userGroup.ts +++ b/packages/data-schemas/src/methods/userGroup.ts @@ -215,7 +215,7 @@ export function createUserGroupMethods(mongoose: typeof import('mongoose')) { const userIdOnTheSource = user.idOnTheSource || userId.toString(); const updatedGroup = await Group.findByIdAndUpdate( groupId, - { $pull: { memberIds: userIdOnTheSource } }, + { $pullAll: { memberIds: [userIdOnTheSource] } }, options, ).lean(); diff --git a/packages/data-schemas/src/utils/retry.ts b/packages/data-schemas/src/utils/retry.ts new file mode 100644 index 0000000000..55becf76ac --- /dev/null +++ b/packages/data-schemas/src/utils/retry.ts @@ -0,0 +1,122 @@ +import logger from '~/config/winston'; + +interface RetryOptions { + maxAttempts?: number; + baseDelayMs?: number; + maxDelayMs?: number; + jitter?: boolean; + retryableErrors?: string[]; + onRetry?: (error: Error, attempt: number, delayMs: number) => void; +} + +const DEFAULT_OPTIONS: Required> = { + maxAttempts: 5, + baseDelayMs: 100, + maxDelayMs: 10_000, + jitter: true, + retryableErrors: ['deadlock', 'lock timeout', 'write conflict', 'ECONNRESET'], +}; + +/** + * Executes an async operation with exponential backoff + jitter retry + * on transient errors (deadlocks, connection resets, lock timeouts). + * + * Designed for FerretDB/DocumentDB operations where concurrent index + * creation or bulk writes can trigger PostgreSQL-level deadlocks. + */ +export async function retryWithBackoff( + operation: () => Promise, + label: string, + options: RetryOptions = {}, +): Promise { + const { + maxAttempts = DEFAULT_OPTIONS.maxAttempts, + baseDelayMs = DEFAULT_OPTIONS.baseDelayMs, + maxDelayMs = DEFAULT_OPTIONS.maxDelayMs, + jitter = DEFAULT_OPTIONS.jitter, + retryableErrors = DEFAULT_OPTIONS.retryableErrors, + } = options; + + if (maxAttempts < 1 || baseDelayMs < 0 || maxDelayMs < 0) { + throw new Error( + `[retryWithBackoff] Invalid options: maxAttempts must be >= 1, delays must be non-negative`, + ); + } + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + return await operation(); + } catch (err: unknown) { + const message = (err as Error)?.message ?? String(err); + const isRetryable = retryableErrors.some((pattern) => + message.toLowerCase().includes(pattern.toLowerCase()), + ); + + if (!isRetryable || attempt === maxAttempts) { + logger.error( + `[retryWithBackoff] ${label} failed permanently after ${attempt} attempt(s): ${message}`, + ); + throw err; + } + + const exponentialDelay = baseDelayMs * Math.pow(2, attempt - 1); + const jitterMs = jitter ? Math.random() * baseDelayMs : 0; + const delayMs = Math.min(exponentialDelay + jitterMs, maxDelayMs); + + logger.warn( + `[retryWithBackoff] ${label} attempt ${attempt}/${maxAttempts} failed (${message}), retrying in ${Math.round(delayMs)}ms`, + ); + + if (options.onRetry) { + const normalizedError = err instanceof Error ? err : new Error(String(err)); + options.onRetry(normalizedError, attempt, delayMs); + } + + await new Promise((resolve) => setTimeout(resolve, delayMs)); + } + } +} + +/** + * Creates all indexes for a Mongoose model with deadlock retry. + * Use this instead of raw `model.createIndexes()` on FerretDB. + */ +export async function createIndexesWithRetry( + model: { createIndexes: () => Promise; modelName: string }, + options: RetryOptions = {}, +): Promise { + await retryWithBackoff( + () => model.createIndexes() as Promise, + `createIndexes(${model.modelName})`, + options, + ); +} + +/** + * Initializes all collections and indexes for a set of models on a connection, + * with per-model deadlock retry. Models are processed sequentially to minimize + * contention on the DocumentDB catalog. + */ +export async function initializeOrgCollections( + models: Record< + string, + { + createCollection: () => Promise; + createIndexes: () => Promise; + modelName: string; + } + >, + options: RetryOptions = {}, +): Promise<{ totalMs: number; perModel: Array<{ name: string; ms: number }> }> { + const perModel: Array<{ name: string; ms: number }> = []; + const t0 = Date.now(); + + for (const model of Object.values(models)) { + const modelStart = Date.now(); + await model.createCollection(); + await createIndexesWithRetry(model, options); + perModel.push({ name: model.modelName, ms: Date.now() - modelStart }); + } + + return { totalMs: Date.now() - t0, perModel }; +} diff --git a/packages/data-schemas/src/utils/transactions.ts b/packages/data-schemas/src/utils/transactions.ts index 09bbb040c1..cc3d0f2aee 100644 --- a/packages/data-schemas/src/utils/transactions.ts +++ b/packages/data-schemas/src/utils/transactions.ts @@ -16,10 +16,16 @@ export const supportsTransactions = async ( await mongoose.connection.db?.collection('__transaction_test__').findOne({}, { session }); - await session.abortTransaction(); + await session.commitTransaction(); logger.debug('MongoDB transactions are supported'); return true; } catch (transactionError: unknown) { + try { + await session.abortTransaction(); + } catch (transactionError) { + /** best-effort abort */ + logger.error(`[supportsTransactions] Error aborting transaction:`, transactionError); + } logger.debug( 'MongoDB transactions not supported (transaction error):', (transactionError as Error)?.message || 'Unknown error',