mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-20 09:24:10 +01:00
* feat: replace unsupported MongoDB aggregation operators for FerretDB compatibility Replace $lookup, $unwind, $sample, $replaceRoot, and $addFields aggregation stages which are unsupported on FerretDB v2.x (postgres-documentdb backend). - Prompt.js: Replace $lookup/$unwind/$project pipelines with find().select().lean() + attachProductionPrompts() batch helper. Replace $group/$replaceRoot/$sample in getRandomPromptGroups with distinct() + Fisher-Yates shuffle. - Agent/Prompt migration scripts: Replace $lookup anti-join pattern with distinct() + $nin two-step queries for finding un-migrated resources. All replacement patterns verified against FerretDB v2.7.0. Co-authored-by: Cursor <cursoragent@cursor.com> * fix: use $pullAll for simple array removals, fix memberIds type mismatches Replace $pull with $pullAll for exact-value scalar array removals. Both operators work on MongoDB and FerretDB, but $pullAll is more explicit for exact matching (no condition expressions). Fix critical type mismatch bugs where ObjectId values were used against String[] memberIds arrays in Group queries: - config/delete-user.js: use string uid instead of ObjectId user._id - e2e/setup/cleanupUser.ts: convert userId.toString() before query Harden PermissionService.bulkUpdateResourcePermissions abort handling to prevent crash when abortTransaction is called after commitTransaction. All changes verified against FerretDB v2.7.0 and MongoDB Memory Server. Co-authored-by: Cursor <cursoragent@cursor.com> * fix: harden transaction support probe for FerretDB compatibility Commit the transaction before aborting in supportsTransactions probe, and wrap abortTransaction in try-catch to prevent crashes when abort is called after a successful commit (observed behavior on FerretDB). Co-authored-by: Cursor <cursoragent@cursor.com> * feat: add FerretDB compatibility test suite, retry utilities, and CI config Add comprehensive FerretDB integration test suite covering: - $pullAll scalar array operations - $pull with subdocument conditions - $lookup replacement (find + manual join) - $sample replacement (distinct + Fisher-Yates) - $bit and $bitsAllSet operations - Migration anti-join pattern - Multi-tenancy (useDb, scaling, write amplification) - Sharding proof-of-concept - Production operations (backup/restore, schema migration, deadlock retry) Add production retryWithBackoff utility for deadlock recovery during concurrent index creation on FerretDB/DocumentDB backends. Add UserController.spec.js tests for deleteUserController (runs in CI). Configure jest and eslint to isolate FerretDB tests from CI pipelines: - packages/data-schemas/jest.config.mjs: ignore misc/ directory - eslint.config.mjs: ignore packages/data-schemas/misc/ Include Docker Compose config for local FerretDB v2.7 + postgres-documentdb, dedicated jest/tsconfig for the test files, and multi-tenancy findings doc. Co-authored-by: Cursor <cursoragent@cursor.com> * style: brace formatting in aclEntry.ts modifyPermissionBits Co-authored-by: Cursor <cursoragent@cursor.com> * refactor: reorganize retry utilities and update imports - Moved retryWithBackoff utility to a new file `retry.ts` for better structure. - Updated imports in `orgOperations.ferretdb.spec.ts` to reflect the new location of retry utilities. - Removed old import statement for retryWithBackoff from index.ts to streamline exports. * test: add $pullAll coverage for ConversationTag and PermissionService Add integration tests for deleteConversationTag verifying $pullAll removes tags from conversations correctly, and for syncUserEntraGroupMemberships verifying $pullAll removes user from non-matching Entra groups while preserving local group membership. --------- Co-authored-by: Cursor <cursoragent@cursor.com>
649 lines
22 KiB
TypeScript
649 lines
22 KiB
TypeScript
import mongoose from 'mongoose';
|
|
import { execSync } from 'child_process';
|
|
import {
|
|
actionSchema,
|
|
agentSchema,
|
|
agentApiKeySchema,
|
|
agentCategorySchema,
|
|
assistantSchema,
|
|
balanceSchema,
|
|
bannerSchema,
|
|
conversationTagSchema,
|
|
convoSchema,
|
|
fileSchema,
|
|
keySchema,
|
|
messageSchema,
|
|
pluginAuthSchema,
|
|
presetSchema,
|
|
projectSchema,
|
|
promptSchema,
|
|
promptGroupSchema,
|
|
roleSchema,
|
|
sessionSchema,
|
|
shareSchema,
|
|
tokenSchema,
|
|
toolCallSchema,
|
|
transactionSchema,
|
|
userSchema,
|
|
memorySchema,
|
|
groupSchema,
|
|
} from '~/schema';
|
|
import accessRoleSchema from '~/schema/accessRole';
|
|
import aclEntrySchema from '~/schema/aclEntry';
|
|
import mcpServerSchema from '~/schema/mcpServer';
|
|
|
|
/**
|
|
* FerretDB Multi-Tenancy Benchmark
|
|
*
|
|
* Validates whether FerretDB can handle LibreChat's multi-tenancy model
|
|
* at scale using database-per-org isolation via Mongoose useDb().
|
|
*
|
|
* Phases:
|
|
* 1. useDb schema mapping — verifies per-org PostgreSQL schema creation and data isolation
|
|
* 2. Index initialization — validates all 29 collections + 97 indexes, tests for deadlocks
|
|
* 3. Scaling curve — measures catalog growth, init time, and query latency at 10/50/100 orgs
|
|
* 4. Write amplification — compares update cost on high-index vs zero-index collections
|
|
* 5. Shared-collection alternative — benchmarks orgId-discriminated shared collections
|
|
*
|
|
* Run:
|
|
* FERRETDB_URI="mongodb://ferretdb:ferretdb@127.0.0.1:27020/mt_bench" \
|
|
* npx jest multiTenancy.ferretdb --testTimeout=600000
|
|
*
|
|
* Env vars:
|
|
* FERRETDB_URI — Required. FerretDB connection string.
|
|
* PG_CONTAINER — Docker container name for psql (default: librechat-ferretdb-postgres-1)
|
|
* SCALE_TIERS — Comma-separated org counts (default: 10,50,100)
|
|
* WRITE_AMP_DOCS — Number of docs for write amp test (default: 200)
|
|
*/
|
|
|
|
const FERRETDB_URI = process.env.FERRETDB_URI;
|
|
const describeIfFerretDB = FERRETDB_URI ? describe : describe.skip;
|
|
|
|
const PG_CONTAINER = process.env.PG_CONTAINER || 'librechat-ferretdb-postgres-1';
|
|
const PG_USER = 'ferretdb';
|
|
const ORG_PREFIX = 'mt_bench_';
|
|
|
|
const DEFAULT_TIERS = [10, 50, 100];
|
|
const SCALE_TIERS: number[] = process.env.SCALE_TIERS
|
|
? process.env.SCALE_TIERS.split(',').map(Number)
|
|
: DEFAULT_TIERS;
|
|
|
|
const WRITE_AMP_DOCS = parseInt(process.env.WRITE_AMP_DOCS || '200', 10);
|
|
|
|
/** All 29 LibreChat schemas by Mongoose model name */
|
|
const MODEL_SCHEMAS: Record<string, mongoose.Schema> = {
|
|
User: userSchema,
|
|
Token: tokenSchema,
|
|
Session: sessionSchema,
|
|
Balance: balanceSchema,
|
|
Conversation: convoSchema,
|
|
Message: messageSchema,
|
|
Agent: agentSchema,
|
|
AgentApiKey: agentApiKeySchema,
|
|
AgentCategory: agentCategorySchema,
|
|
MCPServer: mcpServerSchema,
|
|
Role: roleSchema,
|
|
Action: actionSchema,
|
|
Assistant: assistantSchema,
|
|
File: fileSchema,
|
|
Banner: bannerSchema,
|
|
Project: projectSchema,
|
|
Key: keySchema,
|
|
PluginAuth: pluginAuthSchema,
|
|
Transaction: transactionSchema,
|
|
Preset: presetSchema,
|
|
Prompt: promptSchema,
|
|
PromptGroup: promptGroupSchema,
|
|
ConversationTag: conversationTagSchema,
|
|
SharedLink: shareSchema,
|
|
ToolCall: toolCallSchema,
|
|
MemoryEntry: memorySchema,
|
|
AccessRole: accessRoleSchema,
|
|
AclEntry: aclEntrySchema,
|
|
Group: groupSchema,
|
|
};
|
|
|
|
const MODEL_COUNT = Object.keys(MODEL_SCHEMAS).length;
|
|
|
|
/** Register all 29 models on a given Mongoose Connection */
|
|
function registerModels(conn: mongoose.Connection): Record<string, mongoose.Model<unknown>> {
|
|
const models: Record<string, mongoose.Model<unknown>> = {};
|
|
for (const [name, schema] of Object.entries(MODEL_SCHEMAS)) {
|
|
models[name] = conn.models[name] || conn.model(name, schema);
|
|
}
|
|
return models;
|
|
}
|
|
|
|
/** Initialize one org database: create all collections then build all indexes sequentially */
|
|
async function initializeOrgDb(conn: mongoose.Connection): Promise<{
|
|
models: Record<string, mongoose.Model<unknown>>;
|
|
durationMs: number;
|
|
}> {
|
|
const models = registerModels(conn);
|
|
const start = Date.now();
|
|
for (const model of Object.values(models)) {
|
|
await model.createCollection();
|
|
await model.createIndexes();
|
|
}
|
|
return { models, durationMs: Date.now() - start };
|
|
}
|
|
|
|
/** Execute a psql command against the FerretDB PostgreSQL backend via docker exec */
|
|
function psql(query: string): string {
|
|
try {
|
|
const escaped = query.replace(/"/g, '\\"');
|
|
return execSync(
|
|
`docker exec ${PG_CONTAINER} psql -U ${PG_USER} -d postgres -t -A -c "${escaped}"`,
|
|
{ encoding: 'utf-8', timeout: 30_000 },
|
|
).trim();
|
|
} catch {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Snapshot of DocumentDB catalog + PostgreSQL system catalog sizes.
|
|
* FerretDB with DocumentDB stores all data in a single `documentdb_data` schema.
|
|
* Each MongoDB collection → `documents_<id>` + `retry_<id>` table pair.
|
|
* The catalog lives in `documentdb_api_catalog.collections` and `.collection_indexes`.
|
|
*/
|
|
function catalogMetrics() {
|
|
return {
|
|
collections: parseInt(psql('SELECT count(*) FROM documentdb_api_catalog.collections'), 10) || 0,
|
|
databases:
|
|
parseInt(
|
|
psql('SELECT count(DISTINCT database_name) FROM documentdb_api_catalog.collections'),
|
|
10,
|
|
) || 0,
|
|
catalogIndexes:
|
|
parseInt(psql('SELECT count(*) FROM documentdb_api_catalog.collection_indexes'), 10) || 0,
|
|
dataTables:
|
|
parseInt(
|
|
psql(
|
|
"SELECT count(*) FROM information_schema.tables WHERE table_schema = 'documentdb_data'",
|
|
),
|
|
10,
|
|
) || 0,
|
|
pgClassTotal: parseInt(psql('SELECT count(*) FROM pg_class'), 10) || 0,
|
|
pgStatRows: parseInt(psql('SELECT count(*) FROM pg_statistic'), 10) || 0,
|
|
};
|
|
}
|
|
|
|
/** Measure point-query latency over N iterations and return percentile stats */
|
|
async function measureLatency(
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
model: mongoose.Model<any>,
|
|
filter: Record<string, unknown>,
|
|
iterations = 50,
|
|
) {
|
|
await model.findOne(filter).lean();
|
|
|
|
const times: number[] = [];
|
|
for (let i = 0; i < iterations; i++) {
|
|
const t0 = process.hrtime.bigint();
|
|
await model.findOne(filter).lean();
|
|
times.push(Number(process.hrtime.bigint() - t0) / 1e6);
|
|
}
|
|
|
|
times.sort((a, b) => a - b);
|
|
return {
|
|
min: times[0],
|
|
max: times[times.length - 1],
|
|
median: times[Math.floor(times.length / 2)],
|
|
p95: times[Math.floor(times.length * 0.95)],
|
|
avg: times.reduce((s, v) => s + v, 0) / times.length,
|
|
};
|
|
}
|
|
|
|
function fmt(n: number): string {
|
|
return n.toFixed(2);
|
|
}
|
|
|
|
describeIfFerretDB('FerretDB Multi-Tenancy Benchmark', () => {
|
|
const createdDbs: string[] = [];
|
|
|
|
beforeAll(async () => {
|
|
await mongoose.connect(FERRETDB_URI as string, { autoIndex: false });
|
|
});
|
|
|
|
afterAll(async () => {
|
|
for (const db of createdDbs) {
|
|
try {
|
|
await mongoose.connection.useDb(db, { useCache: false }).dropDatabase();
|
|
} catch {
|
|
/* best-effort cleanup */
|
|
}
|
|
}
|
|
try {
|
|
await mongoose.connection.dropDatabase();
|
|
} catch {
|
|
/* best-effort */
|
|
}
|
|
await mongoose.disconnect();
|
|
}, 600_000);
|
|
|
|
// ─── PHASE 1: DATABASE-PER-ORG SCHEMA MAPPING ────────────────────────────
|
|
|
|
describe('Phase 1: useDb Schema Mapping', () => {
|
|
const org1Db = `${ORG_PREFIX}iso_1`;
|
|
const org2Db = `${ORG_PREFIX}iso_2`;
|
|
let org1Models: Record<string, mongoose.Model<unknown>>;
|
|
let org2Models: Record<string, mongoose.Model<unknown>>;
|
|
|
|
beforeAll(() => {
|
|
createdDbs.push(org1Db, org2Db);
|
|
});
|
|
|
|
it('creates separate databases with all 29 collections via useDb()', async () => {
|
|
const c1 = mongoose.connection.useDb(org1Db, { useCache: true });
|
|
const c2 = mongoose.connection.useDb(org2Db, { useCache: true });
|
|
|
|
const r1 = await initializeOrgDb(c1);
|
|
const r2 = await initializeOrgDb(c2);
|
|
org1Models = r1.models;
|
|
org2Models = r2.models;
|
|
|
|
console.log(`[Phase 1] org1 init: ${r1.durationMs}ms | org2 init: ${r2.durationMs}ms`);
|
|
|
|
expect(Object.keys(org1Models)).toHaveLength(MODEL_COUNT);
|
|
expect(Object.keys(org2Models)).toHaveLength(MODEL_COUNT);
|
|
}, 120_000);
|
|
|
|
it('maps each useDb database to a separate entry in the DocumentDB catalog', () => {
|
|
const raw = psql(
|
|
`SELECT database_name FROM documentdb_api_catalog.collections WHERE database_name LIKE '${ORG_PREFIX}%' GROUP BY database_name ORDER BY database_name`,
|
|
);
|
|
const dbNames = raw.split('\n').filter(Boolean);
|
|
console.log('[Phase 1] DocumentDB databases:', dbNames);
|
|
|
|
expect(dbNames).toContain(org1Db);
|
|
expect(dbNames).toContain(org2Db);
|
|
|
|
const perDb = psql(
|
|
`SELECT database_name, count(*) FROM documentdb_api_catalog.collections WHERE database_name LIKE '${ORG_PREFIX}%' GROUP BY database_name ORDER BY database_name`,
|
|
);
|
|
console.log('[Phase 1] Collections per database:\n' + perDb);
|
|
});
|
|
|
|
it('isolates data between org databases', async () => {
|
|
await org1Models.User.create({
|
|
name: 'Org1 User',
|
|
email: 'u@org1.test',
|
|
username: 'org1user',
|
|
});
|
|
await org2Models.User.create({
|
|
name: 'Org2 User',
|
|
email: 'u@org2.test',
|
|
username: 'org2user',
|
|
});
|
|
|
|
const u1 = await org1Models.User.find({}).lean();
|
|
const u2 = await org2Models.User.find({}).lean();
|
|
|
|
expect(u1).toHaveLength(1);
|
|
expect(u2).toHaveLength(1);
|
|
expect((u1[0] as Record<string, unknown>).email).toBe('u@org1.test');
|
|
expect((u2[0] as Record<string, unknown>).email).toBe('u@org2.test');
|
|
}, 30_000);
|
|
});
|
|
|
|
// ─── PHASE 2: INDEX INITIALIZATION ────────────────────────────────────────
|
|
|
|
describe('Phase 2: Index Initialization', () => {
|
|
const seqDb = `${ORG_PREFIX}idx_seq`;
|
|
|
|
beforeAll(() => {
|
|
createdDbs.push(seqDb);
|
|
});
|
|
|
|
it('creates all indexes sequentially and reports per-model breakdown', async () => {
|
|
const conn = mongoose.connection.useDb(seqDb, { useCache: true });
|
|
const models = registerModels(conn);
|
|
|
|
const stats: { name: string; ms: number; idxCount: number }[] = [];
|
|
for (const [name, model] of Object.entries(models)) {
|
|
const t0 = Date.now();
|
|
await model.createCollection();
|
|
await model.createIndexes();
|
|
const idxs = await model.collection.indexes();
|
|
stats.push({ name, ms: Date.now() - t0, idxCount: idxs.length - 1 });
|
|
}
|
|
|
|
const totalMs = stats.reduce((s, r) => s + r.ms, 0);
|
|
const totalIdx = stats.reduce((s, r) => s + r.idxCount, 0);
|
|
|
|
console.log(`[Phase 2] Sequential: ${totalMs}ms total, ${totalIdx} custom indexes`);
|
|
console.log('[Phase 2] Slowest 10:');
|
|
for (const s of stats.sort((a, b) => b.ms - a.ms).slice(0, 10)) {
|
|
console.log(` ${s.name.padEnd(20)} ${String(s.idxCount).padStart(3)} indexes ${s.ms}ms`);
|
|
}
|
|
|
|
expect(totalIdx).toBeGreaterThanOrEqual(90);
|
|
}, 120_000);
|
|
|
|
it('tests concurrent index creation for deadlock risk', async () => {
|
|
const concDb = `${ORG_PREFIX}idx_conc`;
|
|
createdDbs.push(concDb);
|
|
const conn = mongoose.connection.useDb(concDb, { useCache: false });
|
|
const models = registerModels(conn);
|
|
|
|
for (const model of Object.values(models)) {
|
|
await model.createCollection();
|
|
}
|
|
|
|
const t0 = Date.now();
|
|
try {
|
|
await Promise.all(Object.values(models).map((m) => m.createIndexes()));
|
|
console.log(`[Phase 2] Concurrent: ${Date.now() - t0}ms — no deadlock`);
|
|
} catch (err) {
|
|
console.warn(
|
|
`[Phase 2] Concurrent: DEADLOCKED after ${Date.now() - t0}ms — ${(err as Error).message}`,
|
|
);
|
|
}
|
|
}, 120_000);
|
|
|
|
it('verifies sparse, partial, and TTL index types on FerretDB', async () => {
|
|
const conn = mongoose.connection.useDb(seqDb, { useCache: true });
|
|
|
|
const userIdxs = await conn.model('User').collection.indexes();
|
|
const sparseCount = userIdxs.filter((i: Record<string, unknown>) => i.sparse).length;
|
|
const ttlCount = userIdxs.filter(
|
|
(i: Record<string, unknown>) => i.expireAfterSeconds !== undefined,
|
|
).length;
|
|
console.log(
|
|
`[Phase 2] User: ${userIdxs.length} total, ${sparseCount} sparse, ${ttlCount} TTL`,
|
|
);
|
|
expect(sparseCount).toBeGreaterThanOrEqual(8);
|
|
|
|
const fileIdxs = await conn.model('File').collection.indexes();
|
|
const partialFile = fileIdxs.find(
|
|
(i: Record<string, unknown>) => i.partialFilterExpression != null,
|
|
);
|
|
console.log(`[Phase 2] File partialFilterExpression: ${partialFile ? 'YES' : 'NO'}`);
|
|
expect(partialFile).toBeDefined();
|
|
|
|
const groupIdxs = await conn.model('Group').collection.indexes();
|
|
const sparseGroup = groupIdxs.find((i: Record<string, unknown>) => i.sparse);
|
|
const partialGroup = groupIdxs.find(
|
|
(i: Record<string, unknown>) => i.partialFilterExpression != null,
|
|
);
|
|
console.log(
|
|
`[Phase 2] Group: sparse=${sparseGroup ? 'YES' : 'NO'}, partial=${partialGroup ? 'YES' : 'NO'}`,
|
|
);
|
|
expect(sparseGroup).toBeDefined();
|
|
expect(partialGroup).toBeDefined();
|
|
}, 60_000);
|
|
});
|
|
|
|
// ─── PHASE 3: SCALING CURVE ───────────────────────────────────────────────
|
|
|
|
describe('Phase 3: Scaling Curve', () => {
|
|
interface TierResult {
|
|
tier: number;
|
|
batchMs: number;
|
|
avgPerOrg: number;
|
|
catalog: ReturnType<typeof catalogMetrics>;
|
|
latency: Awaited<ReturnType<typeof measureLatency>>;
|
|
}
|
|
|
|
const tierResults: TierResult[] = [];
|
|
let orgsCreated = 0;
|
|
let firstOrgConn: mongoose.Connection | null = null;
|
|
|
|
beforeAll(() => {
|
|
const baseline = catalogMetrics();
|
|
console.log(
|
|
`[Phase 3] Baseline — collections: ${baseline.collections}, ` +
|
|
`databases: ${baseline.databases}, catalog indexes: ${baseline.catalogIndexes}, ` +
|
|
`data tables: ${baseline.dataTables}, pg_class: ${baseline.pgClassTotal}`,
|
|
);
|
|
});
|
|
|
|
it.each(SCALE_TIERS)(
|
|
'scales to %i orgs',
|
|
async (target) => {
|
|
const t0 = Date.now();
|
|
|
|
for (let i = orgsCreated + 1; i <= target; i++) {
|
|
const dbName = `${ORG_PREFIX}s${i}`;
|
|
createdDbs.push(dbName);
|
|
|
|
const conn = mongoose.connection.useDb(dbName, { useCache: i === 1 });
|
|
if (i === 1) {
|
|
firstOrgConn = conn;
|
|
}
|
|
|
|
const models = registerModels(conn);
|
|
for (const model of Object.values(models)) {
|
|
await model.createCollection();
|
|
await model.createIndexes();
|
|
}
|
|
|
|
if (i === 1) {
|
|
await models.User.create({
|
|
name: 'Latency Probe',
|
|
email: 'probe@scale.test',
|
|
username: 'probe',
|
|
});
|
|
}
|
|
|
|
if (i % 10 === 0) {
|
|
process.stdout.write(` ${i}/${target} orgs\n`);
|
|
}
|
|
}
|
|
|
|
const batchMs = Date.now() - t0;
|
|
const batchSize = target - orgsCreated;
|
|
orgsCreated = target;
|
|
|
|
const lat = await measureLatency(firstOrgConn!.model('User'), {
|
|
email: 'probe@scale.test',
|
|
});
|
|
const cat = catalogMetrics();
|
|
|
|
tierResults.push({
|
|
tier: target,
|
|
batchMs,
|
|
avgPerOrg: batchSize > 0 ? Math.round(batchMs / batchSize) : 0,
|
|
catalog: cat,
|
|
latency: lat,
|
|
});
|
|
|
|
console.log(`\n[Phase 3] === ${target} orgs ===`);
|
|
console.log(
|
|
` Init: ${batchMs}ms total (${batchSize > 0 ? Math.round(batchMs / batchSize) : 0}ms/org, batch=${batchSize})`,
|
|
);
|
|
console.log(
|
|
` Query: avg=${fmt(lat.avg)}ms median=${fmt(lat.median)}ms p95=${fmt(lat.p95)}ms`,
|
|
);
|
|
console.log(
|
|
` Catalog: ${cat.collections} collections, ${cat.catalogIndexes} indexes, ` +
|
|
`${cat.dataTables} data tables, pg_class=${cat.pgClassTotal}`,
|
|
);
|
|
|
|
expect(cat.collections).toBeGreaterThan(0);
|
|
},
|
|
600_000,
|
|
);
|
|
|
|
afterAll(() => {
|
|
if (tierResults.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const hdr = [
|
|
'Orgs',
|
|
'Colls',
|
|
'CatIdx',
|
|
'DataTbls',
|
|
'pg_class',
|
|
'Init/org',
|
|
'Qry avg',
|
|
'Qry p95',
|
|
];
|
|
const w = [8, 10, 10, 10, 12, 12, 12, 12];
|
|
|
|
console.log('\n[Phase 3] SCALING SUMMARY');
|
|
console.log('─'.repeat(w.reduce((a, b) => a + b)));
|
|
console.log(hdr.map((h, i) => h.padEnd(w[i])).join(''));
|
|
console.log('─'.repeat(w.reduce((a, b) => a + b)));
|
|
|
|
for (const r of tierResults) {
|
|
const row = [
|
|
String(r.tier),
|
|
String(r.catalog.collections),
|
|
String(r.catalog.catalogIndexes),
|
|
String(r.catalog.dataTables),
|
|
String(r.catalog.pgClassTotal),
|
|
`${r.avgPerOrg}ms`,
|
|
`${fmt(r.latency.avg)}ms`,
|
|
`${fmt(r.latency.p95)}ms`,
|
|
];
|
|
console.log(row.map((v, i) => v.padEnd(w[i])).join(''));
|
|
}
|
|
console.log('─'.repeat(w.reduce((a, b) => a + b)));
|
|
});
|
|
});
|
|
|
|
// ─── PHASE 4: WRITE AMPLIFICATION ────────────────────────────────────────
|
|
|
|
describe('Phase 4: Write Amplification', () => {
|
|
it('compares update cost: high-index (User, 11+ idx) vs zero-index collection', async () => {
|
|
const db = `${ORG_PREFIX}wamp`;
|
|
createdDbs.push(db);
|
|
const conn = mongoose.connection.useDb(db, { useCache: false });
|
|
|
|
const HighIdx = conn.model('User', userSchema);
|
|
await HighIdx.createCollection();
|
|
await HighIdx.createIndexes();
|
|
|
|
const bareSchema = new mongoose.Schema({ name: String, email: String, ts: Date });
|
|
const LowIdx = conn.model('BareDoc', bareSchema);
|
|
await LowIdx.createCollection();
|
|
|
|
const N = WRITE_AMP_DOCS;
|
|
|
|
await HighIdx.insertMany(
|
|
Array.from({ length: N }, (_, i) => ({
|
|
name: `U${i}`,
|
|
email: `u${i}@wamp.test`,
|
|
username: `u${i}`,
|
|
})),
|
|
);
|
|
await LowIdx.insertMany(
|
|
Array.from({ length: N }, (_, i) => ({
|
|
name: `U${i}`,
|
|
email: `u${i}@wamp.test`,
|
|
ts: new Date(),
|
|
})),
|
|
);
|
|
|
|
const walBefore = psql('SELECT wal_bytes FROM pg_stat_wal');
|
|
|
|
const highStart = Date.now();
|
|
for (let i = 0; i < N; i++) {
|
|
await HighIdx.updateOne({ email: `u${i}@wamp.test` }, { $set: { name: `X${i}` } });
|
|
}
|
|
const highMs = Date.now() - highStart;
|
|
|
|
const walMid = psql('SELECT wal_bytes FROM pg_stat_wal');
|
|
|
|
const lowStart = Date.now();
|
|
for (let i = 0; i < N; i++) {
|
|
await LowIdx.updateOne({ email: `u${i}@wamp.test` }, { $set: { name: `X${i}` } });
|
|
}
|
|
const lowMs = Date.now() - lowStart;
|
|
|
|
const walAfter = psql('SELECT wal_bytes FROM pg_stat_wal');
|
|
|
|
console.log(`\n[Phase 4] Write Amplification (${N} updates each)`);
|
|
console.log(` High-index (User, 11+ idx): ${highMs}ms (${fmt(highMs / N)}ms/op)`);
|
|
console.log(` Zero-index (bare): ${lowMs}ms (${fmt(lowMs / N)}ms/op)`);
|
|
console.log(` Time ratio: ${fmt(highMs / Math.max(lowMs, 1))}x`);
|
|
|
|
if (walBefore && walMid && walAfter) {
|
|
const wHigh = BigInt(walMid) - BigInt(walBefore);
|
|
const wLow = BigInt(walAfter) - BigInt(walMid);
|
|
console.log(` WAL: high-idx=${wHigh} bytes, bare=${wLow} bytes`);
|
|
if (wLow > BigInt(0)) {
|
|
console.log(` WAL ratio: ${fmt(Number(wHigh) / Number(wLow))}x`);
|
|
}
|
|
}
|
|
|
|
expect(highMs).toBeGreaterThan(0);
|
|
expect(lowMs).toBeGreaterThan(0);
|
|
}, 300_000);
|
|
});
|
|
|
|
// ─── PHASE 5: SHARED-COLLECTION ALTERNATIVE ──────────────────────────────
|
|
|
|
describe('Phase 5: Shared Collection Alternative', () => {
|
|
it('benchmarks shared collection with orgId discriminator field', async () => {
|
|
const db = `${ORG_PREFIX}shared`;
|
|
createdDbs.push(db);
|
|
const conn = mongoose.connection.useDb(db, { useCache: false });
|
|
|
|
const sharedSchema = new mongoose.Schema({
|
|
orgId: { type: String, required: true, index: true },
|
|
name: String,
|
|
email: String,
|
|
username: String,
|
|
provider: { type: String, default: 'local' },
|
|
role: { type: String, default: 'USER' },
|
|
});
|
|
sharedSchema.index({ orgId: 1, email: 1 }, { unique: true });
|
|
|
|
const Shared = conn.model('SharedUser', sharedSchema);
|
|
await Shared.createCollection();
|
|
await Shared.createIndexes();
|
|
|
|
const ORG_N = 100;
|
|
const USERS_PER = 50;
|
|
|
|
const docs = [];
|
|
for (let o = 0; o < ORG_N; o++) {
|
|
for (let u = 0; u < USERS_PER; u++) {
|
|
docs.push({
|
|
orgId: `org_${o}`,
|
|
name: `User ${u}`,
|
|
email: `u${u}@o${o}.test`,
|
|
username: `u${u}_o${o}`,
|
|
});
|
|
}
|
|
}
|
|
|
|
const insertT0 = Date.now();
|
|
await Shared.insertMany(docs, { ordered: false });
|
|
const insertMs = Date.now() - insertT0;
|
|
|
|
const totalDocs = ORG_N * USERS_PER;
|
|
console.log(`\n[Phase 5] Shared collection: ${totalDocs} docs inserted in ${insertMs}ms`);
|
|
|
|
const pointLat = await measureLatency(Shared, {
|
|
orgId: 'org_50',
|
|
email: 'u25@o50.test',
|
|
});
|
|
console.log(
|
|
` Point query: avg=${fmt(pointLat.avg)}ms median=${fmt(pointLat.median)}ms p95=${fmt(pointLat.p95)}ms`,
|
|
);
|
|
|
|
const listT0 = Date.now();
|
|
const orgDocs = await Shared.find({ orgId: 'org_50' }).lean();
|
|
const listMs = Date.now() - listT0;
|
|
console.log(` List org users (${orgDocs.length} docs): ${listMs}ms`);
|
|
|
|
const countT0 = Date.now();
|
|
const count = await Shared.countDocuments({ orgId: 'org_50' });
|
|
const countMs = Date.now() - countT0;
|
|
console.log(` Count org users: ${count} in ${countMs}ms`);
|
|
|
|
const cat = catalogMetrics();
|
|
console.log(
|
|
` Catalog: ${cat.collections} collections, ${cat.catalogIndexes} indexes, ` +
|
|
`${cat.dataTables} data tables (shared approach = 1 extra db, minimal overhead)`,
|
|
);
|
|
|
|
expect(orgDocs).toHaveLength(USERS_PER);
|
|
}, 120_000);
|
|
});
|
|
});
|