LibreChat/packages/api/src/cache/cacheConfig.ts
Danny Avila 5e3b7bcde3
🌊 refactor: Local Snapshot for Aggregate Key Cache to Avoid Redundant Redis GETs (#12422)
* perf: Add local snapshot to aggregate key cache to avoid redundant Redis GETs

getAll() was being called 20+ times per chat request (once per tool,
per server config lookup, per connection check). Each call hit Redis
even though the data doesn't change within a request cycle.

Add an in-memory snapshot with 5s TTL that collapses all reads within
the window into a single Redis GET. Writes (add/update/remove/reset)
invalidate the snapshot immediately so mutations are never stale.

Also removes the debug logger that was producing noisy per-call logs.

* fix: Prevent snapshot mutation and guarantee cleanup on write failure

- Never mutate the snapshot object in-place during writes. Build a new
  object (spread) so concurrent readers never observe uncommitted state.
- Move invalidateLocalSnapshot() into withWriteLock's finally block so
  cleanup is guaranteed even when successCheck throws on Redis failure.
- After successful writes, populate the snapshot with the committed state
  to avoid an unnecessary Redis GET on the next read.
- Use Date.now() after the await in getAll() so the TTL window isn't
  shortened by Redis latency.
- Strengthen tests: spy on underlying Keyv cache to verify N getAll()
  calls collapse into 1 Redis GET, verify snapshot reference immutability.

* fix: Remove dead populateLocalSnapshot calls from write callbacks

populateLocalSnapshot was called inside withWriteLock callbacks, but
the finally block in withWriteLock always calls invalidateLocalSnapshot
immediately after — undoing the populate on every execution path.

Remove the dead method and its three call sites. The snapshot is
correctly cleared by finally on both success and failure paths. The
next getAll() after a write hits Redis once to fetch the committed
state, which is acceptable since writes only occur during init and
rare manual reinspection.

* fix: Derive local snapshot TTL from MCP_REGISTRY_CACHE_TTL config

Use cacheConfig.MCP_REGISTRY_CACHE_TTL (default 5000ms) instead of a
hardcoded 5s constant. When TTL is 0 (operator explicitly wants no
caching), the snapshot is disabled entirely — every getAll() hits Redis.

* fix: Add TTL expiry test, document 2×TTL staleness, clarify comments

- Add missing test for snapshot TTL expiry path (force-expire via
  localSnapshotExpiry mutation, verify Redis is hit again)
- Document 2×TTL max cross-instance staleness in localSnapshot JSDoc
- Document reset() intentionally bypasses withWriteLock
- Add inline comments explaining why early invalidateLocalSnapshot()
  in write callbacks is distinct from the finally-block cleanup
- Update cacheConfig.MCP_REGISTRY_CACHE_TTL JSDoc to reflect both
  use sites and the staleness implication
- Rename misleading test name for snapshot reference immutability
- Add epoch sentinel comment on localSnapshotExpiry initialization
2026-03-26 16:39:09 -04:00

143 lines
6.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { readFileSync, existsSync } from 'fs';
import { logger } from '@librechat/data-schemas';
import { CacheKeys } from 'librechat-data-provider';
import { math, isEnabled } from '~/utils';
// To ensure that different deployments do not interfere with each other's cache, we use a prefix for the Redis keys.
// This prefix is usually the deployment ID, which is often passed to the container or pod as an env var.
// Set REDIS_KEY_PREFIX_VAR to the env var that contains the deployment ID.
const REDIS_KEY_PREFIX_VAR = process.env.REDIS_KEY_PREFIX_VAR;
const REDIS_KEY_PREFIX = process.env.REDIS_KEY_PREFIX;
if (REDIS_KEY_PREFIX_VAR && REDIS_KEY_PREFIX) {
throw new Error('Only either REDIS_KEY_PREFIX_VAR or REDIS_KEY_PREFIX can be set.');
}
const USE_REDIS = isEnabled(process.env.USE_REDIS);
if (USE_REDIS && !process.env.REDIS_URI) {
throw new Error('USE_REDIS is enabled but REDIS_URI is not set.');
}
// USE_REDIS_STREAMS controls whether Redis is used for resumable stream job storage.
// Defaults to true if USE_REDIS is enabled but USE_REDIS_STREAMS is not explicitly set.
// Set to 'false' to use in-memory storage for streams while keeping Redis for other caches.
const USE_REDIS_STREAMS =
process.env.USE_REDIS_STREAMS !== undefined
? isEnabled(process.env.USE_REDIS_STREAMS)
: USE_REDIS;
// Comma-separated list of cache namespaces that should be forced to use in-memory storage
// even when Redis is enabled. This allows selective performance optimization for specific caches.
// Defaults to CONFIG_STORE,APP_CONFIG so YAML-derived config stays per-container.
// Set to empty string to force all namespaces through Redis.
const FORCED_IN_MEMORY_CACHE_NAMESPACES =
process.env.FORCED_IN_MEMORY_CACHE_NAMESPACES !== undefined
? process.env.FORCED_IN_MEMORY_CACHE_NAMESPACES.split(',')
.map((key) => key.trim())
.filter(Boolean)
: [CacheKeys.CONFIG_STORE, CacheKeys.APP_CONFIG];
// Validate against CacheKeys enum
if (FORCED_IN_MEMORY_CACHE_NAMESPACES.length > 0) {
const validKeys = Object.values(CacheKeys) as string[];
const invalidKeys = FORCED_IN_MEMORY_CACHE_NAMESPACES.filter((key) => !validKeys.includes(key));
if (invalidKeys.length > 0) {
throw new Error(
`Invalid cache keys in FORCED_IN_MEMORY_CACHE_NAMESPACES: ${invalidKeys.join(', ')}. Valid keys: ${validKeys.join(', ')}`,
);
}
}
/** Helper function to safely read Redis CA certificate from file
* @returns {string|null} The contents of the CA certificate file, or null if not set or on error
*/
const getRedisCA = (): string | null => {
const caPath = process.env.REDIS_CA;
if (!caPath) {
return null;
}
try {
if (existsSync(caPath)) {
return readFileSync(caPath, 'utf8');
} else {
logger.warn(`Redis CA certificate file not found: ${caPath}`);
return null;
}
} catch (error) {
logger.error(`Failed to read Redis CA certificate file '${caPath}':`, error);
return null;
}
};
const cacheConfig = {
FORCED_IN_MEMORY_CACHE_NAMESPACES,
USE_REDIS,
USE_REDIS_STREAMS,
REDIS_URI: process.env.REDIS_URI,
REDIS_USERNAME: process.env.REDIS_USERNAME,
REDIS_PASSWORD: process.env.REDIS_PASSWORD,
REDIS_CA: getRedisCA(),
REDIS_KEY_PREFIX: process.env[REDIS_KEY_PREFIX_VAR ?? ''] || REDIS_KEY_PREFIX || '',
GLOBAL_PREFIX_SEPARATOR: '::',
REDIS_MAX_LISTENERS: math(process.env.REDIS_MAX_LISTENERS, 40),
REDIS_PING_INTERVAL: math(process.env.REDIS_PING_INTERVAL, 0),
/** Max delay between reconnection attempts in ms */
REDIS_RETRY_MAX_DELAY: math(process.env.REDIS_RETRY_MAX_DELAY, 3000),
/** Max number of reconnection attempts (0 = infinite) */
REDIS_RETRY_MAX_ATTEMPTS: math(process.env.REDIS_RETRY_MAX_ATTEMPTS, 10),
/** Connection timeout in ms */
REDIS_CONNECT_TIMEOUT: math(process.env.REDIS_CONNECT_TIMEOUT, 10000),
/** Queue commands when disconnected */
REDIS_ENABLE_OFFLINE_QUEUE: isEnabled(process.env.REDIS_ENABLE_OFFLINE_QUEUE ?? 'true'),
/** flag to modify redis connection by adding dnsLookup this is required when connecting to elasticache for ioredis
* see "Special Note: Aws Elasticache Clusters with TLS" on this webpage: https://www.npmjs.com/package/ioredis **/
REDIS_USE_ALTERNATIVE_DNS_LOOKUP: isEnabled(process.env.REDIS_USE_ALTERNATIVE_DNS_LOOKUP),
/** Enable redis cluster without the need of multiple URIs */
USE_REDIS_CLUSTER: isEnabled(process.env.USE_REDIS_CLUSTER ?? 'false'),
CI: isEnabled(process.env.CI),
DEBUG_MEMORY_CACHE: isEnabled(process.env.DEBUG_MEMORY_CACHE),
BAN_DURATION: math(process.env.BAN_DURATION, 7200000), // 2 hours
/**
* Number of keys to delete in each batch during Redis DEL operations.
* In cluster mode, keys are deleted individually in parallel chunks to avoid CROSSSLOT errors.
* In single-node mode, keys are deleted in batches using DEL with arrays.
* Lower values reduce memory usage but increase number of Redis calls.
* @default 1000
*/
REDIS_DELETE_CHUNK_SIZE: math(process.env.REDIS_DELETE_CHUNK_SIZE, 1000),
/**
* Number of keys to update in each batch during Redis SET operations.
* In cluster mode, keys are updated individually in parallel chunks to avoid CROSSSLOT errors.
* In single-node mode, keys are updated in batches using transactions (multi/exec).
* Lower values reduce memory usage but increase number of Redis calls.
* @default 1000
*/
REDIS_UPDATE_CHUNK_SIZE: math(process.env.REDIS_UPDATE_CHUNK_SIZE, 1000),
/**
* COUNT hint for Redis SCAN operations when scanning keys by pattern.
* This is a hint to Redis about how many keys to scan in each iteration.
* Higher values can reduce round trips but increase memory usage and latency per call.
* Note: Redis may return more or fewer keys than this count depending on internal heuristics.
* @default 1000
*/
REDIS_SCAN_COUNT: math(process.env.REDIS_SCAN_COUNT, 1000),
/**
* TTL in milliseconds for MCP registry caches. Used by both:
* - `MCPServersRegistry` read-through caches (`readThroughCache`/`readThroughCacheAll`)
* - `ServerConfigsCacheRedisAggregateKey` local snapshot (avoids redundant Redis GETs)
*
* Both layers use this value, so the effective max cross-instance staleness is up
* to 2× this value in multi-instance deployments. Set to 0 to disable the local
* snapshot entirely (every `getAll()` hits Redis directly).
* @default 5000 (5 seconds)
*/
MCP_REGISTRY_CACHE_TTL: math(process.env.MCP_REGISTRY_CACHE_TTL, 5000),
};
export { cacheConfig };