2024-07-30 09:18:52 -04:00
|
|
|
const axios = require('axios');
|
2024-11-12 16:41:04 -05:00
|
|
|
const fs = require('fs').promises;
|
2024-08-30 21:11:15 +02:00
|
|
|
const FormData = require('form-data');
|
|
|
|
|
const { Readable } = require('stream');
|
2025-08-26 12:10:18 -04:00
|
|
|
const { logger } = require('@librechat/data-schemas');
|
2025-12-10 02:23:03 +01:00
|
|
|
const { HttpsProxyAgent } = require('https-proxy-agent');
|
2025-12-09 22:25:45 -05:00
|
|
|
const { genAzureEndpoint, logAxiosError } = require('@librechat/api');
|
2024-07-30 09:18:52 -04:00
|
|
|
const { extractEnvVariable, STTProviders } = require('librechat-data-provider');
|
2025-08-26 12:10:18 -04:00
|
|
|
const { getAppConfig } = require('~/server/services/Config');
|
2024-07-30 09:18:52 -04:00
|
|
|
|
2025-03-23 16:26:06 +01:00
|
|
|
/**
|
|
|
|
|
* Maps MIME types to their corresponding file extensions for audio files.
|
|
|
|
|
* @type {Object}
|
|
|
|
|
*/
|
|
|
|
|
const MIME_TO_EXTENSION_MAP = {
|
|
|
|
|
// MP4 container formats
|
|
|
|
|
'audio/mp4': 'm4a',
|
|
|
|
|
'audio/x-m4a': 'm4a',
|
|
|
|
|
// Ogg formats
|
|
|
|
|
'audio/ogg': 'ogg',
|
|
|
|
|
'audio/vorbis': 'ogg',
|
|
|
|
|
'application/ogg': 'ogg',
|
|
|
|
|
// Wave formats
|
|
|
|
|
'audio/wav': 'wav',
|
|
|
|
|
'audio/x-wav': 'wav',
|
|
|
|
|
'audio/wave': 'wav',
|
|
|
|
|
// MP3 formats
|
|
|
|
|
'audio/mp3': 'mp3',
|
|
|
|
|
'audio/mpeg': 'mp3',
|
|
|
|
|
'audio/mpeg3': 'mp3',
|
|
|
|
|
// WebM formats
|
|
|
|
|
'audio/webm': 'webm',
|
|
|
|
|
// Additional formats
|
|
|
|
|
'audio/flac': 'flac',
|
|
|
|
|
'audio/x-flac': 'flac',
|
|
|
|
|
};
|
|
|
|
|
|
2025-12-09 22:25:45 -05:00
|
|
|
/**
|
|
|
|
|
* Validates and extracts ISO-639-1 language code from a locale string.
|
|
|
|
|
* @param {string} language - The language/locale string (e.g., "en-US", "en", "zh-CN")
|
|
|
|
|
* @returns {string|null} The ISO-639-1 language code (e.g., "en") or null if invalid
|
|
|
|
|
*/
|
|
|
|
|
function getValidatedLanguageCode(language) {
|
|
|
|
|
try {
|
|
|
|
|
if (!language) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const normalizedLanguage = language.toLowerCase();
|
|
|
|
|
const isValidLocaleCode = /^[a-z]{2}(-[a-z]{2})?$/.test(normalizedLanguage);
|
|
|
|
|
|
|
|
|
|
if (isValidLocaleCode) {
|
|
|
|
|
return normalizedLanguage.split('-')[0];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.warn(
|
|
|
|
|
`[STT] Invalid language format "${language}". Expected ISO-639-1 locale code like "en-US" or "en". Skipping language parameter.`,
|
|
|
|
|
);
|
|
|
|
|
return null;
|
|
|
|
|
} catch (error) {
|
|
|
|
|
logger.error(`[STT] Error validating language code "${language}":`, error);
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-23 16:26:06 +01:00
|
|
|
/**
|
|
|
|
|
* Gets the file extension from the MIME type.
|
|
|
|
|
* @param {string} mimeType - The MIME type.
|
|
|
|
|
* @returns {string} The file extension.
|
|
|
|
|
*/
|
|
|
|
|
function getFileExtensionFromMime(mimeType) {
|
|
|
|
|
// Default fallback
|
|
|
|
|
if (!mimeType) {
|
|
|
|
|
return 'webm';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Direct lookup (fastest)
|
|
|
|
|
const extension = MIME_TO_EXTENSION_MAP[mimeType];
|
|
|
|
|
if (extension) {
|
|
|
|
|
return extension;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try to extract subtype as fallback
|
|
|
|
|
const subtype = mimeType.split('/')[1]?.toLowerCase();
|
|
|
|
|
|
|
|
|
|
// If subtype matches a known extension
|
|
|
|
|
if (['mp3', 'mp4', 'ogg', 'wav', 'webm', 'm4a', 'flac'].includes(subtype)) {
|
|
|
|
|
return subtype === 'mp4' ? 'm4a' : subtype;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Generic checks for partial matches
|
|
|
|
|
if (subtype?.includes('mp4') || subtype?.includes('m4a')) {
|
|
|
|
|
return 'm4a';
|
|
|
|
|
}
|
|
|
|
|
if (subtype?.includes('ogg')) {
|
|
|
|
|
return 'ogg';
|
|
|
|
|
}
|
|
|
|
|
if (subtype?.includes('wav')) {
|
|
|
|
|
return 'wav';
|
|
|
|
|
}
|
|
|
|
|
if (subtype?.includes('mp3') || subtype?.includes('mpeg')) {
|
|
|
|
|
return 'mp3';
|
|
|
|
|
}
|
|
|
|
|
if (subtype?.includes('webm')) {
|
|
|
|
|
return 'webm';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 'webm'; // Default fallback
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-30 09:18:52 -04:00
|
|
|
/**
|
|
|
|
|
* Service class for handling Speech-to-Text (STT) operations.
|
|
|
|
|
* @class
|
|
|
|
|
*/
|
|
|
|
|
class STTService {
|
2025-08-26 12:10:18 -04:00
|
|
|
constructor() {
|
2024-07-30 09:18:52 -04:00
|
|
|
this.providerStrategies = {
|
|
|
|
|
[STTProviders.OPENAI]: this.openAIProvider,
|
|
|
|
|
[STTProviders.AZURE_OPENAI]: this.azureOpenAIProvider,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Creates a singleton instance of STTService.
|
|
|
|
|
* @static
|
|
|
|
|
* @async
|
|
|
|
|
* @returns {Promise<STTService>} The STTService instance.
|
|
|
|
|
* @throws {Error} If the custom config is not found.
|
|
|
|
|
*/
|
|
|
|
|
static async getInstance() {
|
2025-08-26 12:10:18 -04:00
|
|
|
return new STTService();
|
2024-07-30 09:18:52 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Retrieves the configured STT provider and its schema.
|
2025-08-26 12:10:18 -04:00
|
|
|
* @param {ServerRequest} req - The request object.
|
2024-07-30 09:18:52 -04:00
|
|
|
* @returns {Promise<[string, Object]>} A promise that resolves to an array containing the provider name and its schema.
|
|
|
|
|
* @throws {Error} If no STT schema is set, multiple providers are set, or no provider is set.
|
|
|
|
|
*/
|
2025-08-26 12:10:18 -04:00
|
|
|
async getProviderSchema(req) {
|
2025-08-27 18:56:04 -04:00
|
|
|
const appConfig =
|
|
|
|
|
req.config ??
|
|
|
|
|
(await getAppConfig({
|
|
|
|
|
role: req?.user?.role,
|
🧵 feat: ALS Context Middleware, Tenant Threading, and Config Cache Invalidation (#12407)
* feat: add tenant context middleware for ALS-based isolation
Introduces tenantContextMiddleware that propagates req.user.tenantId
into AsyncLocalStorage, activating the Mongoose applyTenantIsolation
plugin for all downstream DB queries within a request.
- Strict mode (TENANT_ISOLATION_STRICT=true) returns 403 if no tenantId
- Non-strict mode passes through for backward compatibility
- No-op for unauthenticated requests
- Includes 6 unit tests covering all paths
* feat: register tenant middleware and wrap startup/auth in runAsSystem()
- Register tenantContextMiddleware in Express app after capability middleware
- Wrap server startup initialization in runAsSystem() for strict mode compat
- Wrap auth strategy getAppConfig() calls in runAsSystem() since they run
before user context is established (LDAP, SAML, OpenID, social login, AuthService)
* feat: thread tenantId through all getAppConfig callers
Pass tenantId from req.user to getAppConfig() across all callers that
have request context, ensuring correct per-tenant cache key resolution.
Also fixes getBaseConfig admin endpoint to scope to requesting admin's
tenant instead of returning the unscoped base config.
Files updated:
- Controllers: UserController, PluginController
- Middleware: checkDomainAllowed, balance
- Routes: config
- Services: loadConfigModels, loadDefaultModels, getEndpointsConfig, MCP
- Audio services: TTSService, STTService, getVoices, getCustomConfigSpeech
- Admin: getBaseConfig endpoint
* feat: add config cache invalidation on admin mutations
- Add clearOverrideCache(tenantId?) to flush per-principal override caches
by enumerating Keyv store keys matching _OVERRIDE_: prefix
- Add invalidateConfigCaches() helper that clears base config, override
caches, tool caches, and endpoint config cache in one call
- Wire invalidation into all 5 admin config mutation handlers
(upsert, patch, delete field, delete overrides, toggle active)
- Add strict mode warning when __default__ tenant fallback is used
- Add 3 new tests for clearOverrideCache (all/scoped/base-preserving)
* chore: update getUserPrincipals comment to reflect ALS-based tenant filtering
The TODO(#12091) about missing tenantId filtering is resolved by the
tenant context middleware + applyTenantIsolation Mongoose plugin.
Group queries are now automatically scoped by tenantId via ALS.
* fix: replace runAsSystem with baseOnly for pre-tenant code paths
App configs are tenant-owned — runAsSystem() would bypass tenant
isolation and return cross-tenant DB overrides. Instead, add
baseOnly option to getAppConfig() that returns YAML-derived config
only, with zero DB queries.
All startup code, auth strategies, and MCP initialization now use
getAppConfig({ baseOnly: true }) to get the YAML config without
touching the Config collection.
* fix: address PR review findings — middleware ordering, types, cache safety
- Chain tenantContextMiddleware inside requireJwtAuth after passport auth
instead of global app.use() where req.user is always undefined (Finding 1)
- Remove global tenantContextMiddleware registration from index.js
- Update BalanceMiddlewareOptions to include tenantId, remove redundant cast (Finding 4)
- Add warning log when clearOverrideCache cannot enumerate keys on Redis (Finding 3)
- Use startsWith instead of includes for cache key filtering (Finding 12)
- Use generator loop instead of Array.from for key enumeration (Finding 3)
- Selective barrel export — exclude _resetTenantMiddlewareStrictCache (Finding 5)
- Move isMainThread check to module level, remove per-request check (Finding 9)
- Move mid-file require to top of app.js (Finding 8)
- Parallelize invalidateConfigCaches with Promise.all (Finding 10)
- Remove clearOverrideCache from public app.js exports (internal only)
- Strengthen getUserPrincipals comment re: ALS dependency (Finding 2)
* fix: restore runAsSystem for startup DB ops, consolidate require, clarify baseOnly
- Restore runAsSystem() around performStartupChecks, updateInterfacePermissions,
initializeMCPs, and initializeOAuthReconnectManager — these make Mongoose
queries that need system context in strict tenant mode (NEW-3)
- Consolidate duplicate require('@librechat/api') in requireJwtAuth.js (NEW-1)
- Document that baseOnly ignores role/userId/tenantId in JSDoc (NEW-2)
* test: add requireJwtAuth tenant chaining + invalidateConfigCaches tests
- requireJwtAuth: 5 tests verifying ALS tenant context is set after
passport auth, isolated between concurrent requests, and not set
when user has no tenantId (Finding 6)
- invalidateConfigCaches: 4 tests verifying all four caches are cleared,
tenantId is threaded through, partial failure is handled gracefully,
and operations run in parallel via Promise.all (Finding 11)
* fix: address Copilot review — passport errors, namespaced cache keys, /base scoping
- Forward passport errors in requireJwtAuth before entering tenant
middleware — prevents silent auth failures from reaching handlers (P1)
- Account for Keyv namespace prefix in clearOverrideCache — stored keys
are namespaced as "APP_CONFIG:_OVERRIDE_:..." not "_OVERRIDE_:...",
so override caches were never actually matched/cleared (P2)
- Remove role from getBaseConfig — /base should return tenant-scoped
base config, not role-merged config that drifts per admin role (P2)
- Return tenantStorage.run() for cleaner async semantics
- Update mock cache in service.spec.ts to simulate Keyv namespacing
* fix: address second review — cache safety, code quality, test reliability
- Decouple cache invalidation from mutation response: fire-and-forget
with logging so DB mutation success is not masked by cache failures
- Extract clearEndpointConfigCache helper from inline IIFE
- Move isMainThread check to lazy once-per-process guard (no import
side effect)
- Memoize process.env read in overrideCacheKey to avoid per-request
env lookups and log flooding in strict mode
- Remove flaky timer-based parallelism assertion, use structural check
- Merge orphaned double JSDoc block on getUserPrincipals
- Fix stale [getAppConfig] log prefix → [ensureBaseConfig]
- Fix import order in tenant.spec.ts (package types before local values)
- Replace "Finding 1" reference with self-contained description
- Use real tenantStorage primitives in requireJwtAuth spec mock
* fix: move JSDoc to correct function after clearEndpointConfigCache extraction
* refactor: remove Redis SCAN from clearOverrideCache, rely on TTL expiry
Redis SCAN causes 60s+ stalls under concurrent load (see #12410).
APP_CONFIG defaults to FORCED_IN_MEMORY_CACHE_NAMESPACES, so the
in-memory store.keys() path handles the standard case. When APP_CONFIG
is Redis-backed, overrides expire naturally via overrideCacheTtl (60s
default) — an acceptable window for admin config mutations.
* fix: remove return from tenantStorage.run to satisfy void middleware signature
* fix: address second review — cache safety, code quality, test reliability
- Switch invalidateConfigCaches from Promise.all to Promise.allSettled
so partial failures are logged individually instead of producing one
undifferentiated error (Finding 3)
- Gate overrideCacheKey strict-mode warning behind a once-per-process
flag to prevent log flooding under load (Finding 4)
- Add test for passport error forwarding in requireJwtAuth — the
if (err) { return next(err) } branch now has coverage (Finding 5)
- Add test for real partial failure in invalidateConfigCaches where
clearAppConfigCache rejects (not just the swallowed endpoint error)
* chore: reorder imports in index.js and app.js for consistency
- Moved logger and runAsSystem imports to maintain a consistent import order across files.
- Improved code readability by ensuring related imports are grouped together.
2026-03-26 17:35:00 -04:00
|
|
|
tenantId: req?.user?.tenantId,
|
2025-08-27 18:56:04 -04:00
|
|
|
}));
|
2025-08-26 12:10:18 -04:00
|
|
|
const sttSchema = appConfig?.speech?.stt;
|
2024-07-30 09:18:52 -04:00
|
|
|
if (!sttSchema) {
|
|
|
|
|
throw new Error(
|
|
|
|
|
'No STT schema is set. Did you configure STT in the custom config (librechat.yaml)?',
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const providers = Object.entries(sttSchema).filter(
|
|
|
|
|
([, value]) => Object.keys(value).length > 0,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (providers.length !== 1) {
|
|
|
|
|
throw new Error(
|
|
|
|
|
providers.length > 1
|
|
|
|
|
? 'Multiple providers are set. Please set only one provider.'
|
|
|
|
|
: 'No provider is set. Please set a provider.',
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const [provider, schema] = providers[0];
|
|
|
|
|
return [provider, schema];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Recursively removes undefined properties from an object.
|
|
|
|
|
* @param {Object} obj - The object to clean.
|
|
|
|
|
* @returns {void}
|
|
|
|
|
*/
|
|
|
|
|
removeUndefined(obj) {
|
|
|
|
|
Object.keys(obj).forEach((key) => {
|
|
|
|
|
if (obj[key] && typeof obj[key] === 'object') {
|
|
|
|
|
this.removeUndefined(obj[key]);
|
|
|
|
|
if (Object.keys(obj[key]).length === 0) {
|
|
|
|
|
delete obj[key];
|
|
|
|
|
}
|
|
|
|
|
} else if (obj[key] === undefined) {
|
|
|
|
|
delete obj[key];
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Prepares the request for the OpenAI STT provider.
|
|
|
|
|
* @param {Object} sttSchema - The STT schema for OpenAI.
|
|
|
|
|
* @param {Stream} audioReadStream - The audio data to be transcribed.
|
2025-09-05 12:01:00 -04:00
|
|
|
* @param {Object} audioFile - The audio file object (unused in OpenAI provider).
|
|
|
|
|
* @param {string} language - The language code for the transcription.
|
2024-07-30 09:18:52 -04:00
|
|
|
* @returns {Array} An array containing the URL, data, and headers for the request.
|
|
|
|
|
*/
|
2025-09-05 12:01:00 -04:00
|
|
|
openAIProvider(sttSchema, audioReadStream, audioFile, language) {
|
2024-07-30 09:18:52 -04:00
|
|
|
const url = sttSchema?.url || 'https://api.openai.com/v1/audio/transcriptions';
|
|
|
|
|
const apiKey = extractEnvVariable(sttSchema.apiKey) || '';
|
|
|
|
|
|
|
|
|
|
const data = {
|
|
|
|
|
file: audioReadStream,
|
|
|
|
|
model: sttSchema.model,
|
|
|
|
|
};
|
|
|
|
|
|
2025-12-09 22:25:45 -05:00
|
|
|
const validLanguage = getValidatedLanguageCode(language);
|
|
|
|
|
if (validLanguage) {
|
|
|
|
|
data.language = validLanguage;
|
2025-09-05 12:01:00 -04:00
|
|
|
}
|
|
|
|
|
|
2024-07-30 09:18:52 -04:00
|
|
|
const headers = {
|
|
|
|
|
'Content-Type': 'multipart/form-data',
|
|
|
|
|
...(apiKey && { Authorization: `Bearer ${apiKey}` }),
|
|
|
|
|
};
|
|
|
|
|
[headers].forEach(this.removeUndefined);
|
|
|
|
|
|
|
|
|
|
return [url, data, headers];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Prepares the request for the Azure OpenAI STT provider.
|
|
|
|
|
* @param {Object} sttSchema - The STT schema for Azure OpenAI.
|
|
|
|
|
* @param {Buffer} audioBuffer - The audio data to be transcribed.
|
|
|
|
|
* @param {Object} audioFile - The audio file object containing originalname, mimetype, and size.
|
2025-09-05 12:01:00 -04:00
|
|
|
* @param {string} language - The language code for the transcription.
|
2024-07-30 09:18:52 -04:00
|
|
|
* @returns {Array} An array containing the URL, data, and headers for the request.
|
|
|
|
|
* @throws {Error} If the audio file size exceeds 25MB or the audio file format is not accepted.
|
|
|
|
|
*/
|
2025-09-05 12:01:00 -04:00
|
|
|
azureOpenAIProvider(sttSchema, audioBuffer, audioFile, language) {
|
2024-07-30 09:18:52 -04:00
|
|
|
const url = `${genAzureEndpoint({
|
2024-12-04 17:44:00 +01:00
|
|
|
azureOpenAIApiInstanceName: extractEnvVariable(sttSchema?.instanceName),
|
|
|
|
|
azureOpenAIApiDeploymentName: extractEnvVariable(sttSchema?.deploymentName),
|
|
|
|
|
})}/audio/transcriptions?api-version=${extractEnvVariable(sttSchema?.apiVersion)}`;
|
2024-07-30 09:18:52 -04:00
|
|
|
|
|
|
|
|
const apiKey = sttSchema.apiKey ? extractEnvVariable(sttSchema.apiKey) : '';
|
|
|
|
|
|
|
|
|
|
if (audioBuffer.byteLength > 25 * 1024 * 1024) {
|
|
|
|
|
throw new Error('The audio file size exceeds the limit of 25MB');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const acceptedFormats = ['flac', 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'ogg', 'wav', 'webm'];
|
|
|
|
|
const fileFormat = audioFile.mimetype.split('/')[1];
|
|
|
|
|
if (!acceptedFormats.includes(fileFormat)) {
|
|
|
|
|
throw new Error(`The audio file format ${fileFormat} is not accepted`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const formData = new FormData();
|
2024-08-30 21:11:15 +02:00
|
|
|
formData.append('file', audioBuffer, {
|
|
|
|
|
filename: audioFile.originalname,
|
|
|
|
|
contentType: audioFile.mimetype,
|
|
|
|
|
});
|
2024-07-30 09:18:52 -04:00
|
|
|
|
2025-12-09 22:25:45 -05:00
|
|
|
const validLanguage = getValidatedLanguageCode(language);
|
|
|
|
|
if (validLanguage) {
|
|
|
|
|
formData.append('language', validLanguage);
|
2025-09-05 12:01:00 -04:00
|
|
|
}
|
|
|
|
|
|
2024-07-30 09:18:52 -04:00
|
|
|
const headers = {
|
|
|
|
|
...(apiKey && { 'api-key': apiKey }),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
[headers].forEach(this.removeUndefined);
|
|
|
|
|
|
2024-08-30 21:11:15 +02:00
|
|
|
return [url, formData, { ...headers, ...formData.getHeaders() }];
|
2024-07-30 09:18:52 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sends an STT request to the specified provider.
|
|
|
|
|
* @async
|
|
|
|
|
* @param {string} provider - The STT provider to use.
|
|
|
|
|
* @param {Object} sttSchema - The STT schema for the provider.
|
|
|
|
|
* @param {Object} requestData - The data required for the STT request.
|
|
|
|
|
* @param {Buffer} requestData.audioBuffer - The audio data to be transcribed.
|
|
|
|
|
* @param {Object} requestData.audioFile - The audio file object containing originalname, mimetype, and size.
|
2025-09-05 12:01:00 -04:00
|
|
|
* @param {string} requestData.language - The language code for the transcription.
|
2024-07-30 09:18:52 -04:00
|
|
|
* @returns {Promise<string>} A promise that resolves to the transcribed text.
|
|
|
|
|
* @throws {Error} If the provider is invalid, the response status is not 200, or the response data is missing.
|
|
|
|
|
*/
|
2025-09-05 12:01:00 -04:00
|
|
|
async sttRequest(provider, sttSchema, { audioBuffer, audioFile, language }) {
|
2024-07-30 09:18:52 -04:00
|
|
|
const strategy = this.providerStrategies[provider];
|
|
|
|
|
if (!strategy) {
|
|
|
|
|
throw new Error('Invalid provider');
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-23 16:26:06 +01:00
|
|
|
const fileExtension = getFileExtensionFromMime(audioFile.mimetype);
|
|
|
|
|
|
2024-07-30 09:18:52 -04:00
|
|
|
const audioReadStream = Readable.from(audioBuffer);
|
2025-03-23 16:26:06 +01:00
|
|
|
audioReadStream.path = `audio.${fileExtension}`;
|
2024-07-30 09:18:52 -04:00
|
|
|
|
2025-09-05 12:01:00 -04:00
|
|
|
const [url, data, headers] = strategy.call(
|
|
|
|
|
this,
|
|
|
|
|
sttSchema,
|
|
|
|
|
audioReadStream,
|
|
|
|
|
audioFile,
|
|
|
|
|
language,
|
|
|
|
|
);
|
2024-07-30 09:18:52 -04:00
|
|
|
|
2025-12-10 02:23:03 +01:00
|
|
|
const options = { headers };
|
|
|
|
|
|
|
|
|
|
if (process.env.PROXY) {
|
|
|
|
|
options.httpsAgent = new HttpsProxyAgent(process.env.PROXY);
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-30 09:18:52 -04:00
|
|
|
try {
|
2025-12-10 02:23:03 +01:00
|
|
|
const response = await axios.post(url, data, options);
|
2024-07-30 09:18:52 -04:00
|
|
|
|
|
|
|
|
if (response.status !== 200) {
|
|
|
|
|
throw new Error('Invalid response from the STT API');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!response.data || !response.data.text) {
|
|
|
|
|
throw new Error('Missing data in response from the STT API');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return response.data.text.trim();
|
|
|
|
|
} catch (error) {
|
2025-12-09 22:25:45 -05:00
|
|
|
logAxiosError({ message: `STT request failed for provider ${provider}:`, error });
|
2024-07-30 09:18:52 -04:00
|
|
|
throw error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Processes a speech-to-text request.
|
|
|
|
|
* @async
|
|
|
|
|
* @param {Object} req - The request object.
|
|
|
|
|
* @param {Object} res - The response object.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2025-08-26 12:10:18 -04:00
|
|
|
async processSpeechToText(req, res) {
|
2024-11-12 16:41:04 -05:00
|
|
|
if (!req.file) {
|
2024-07-30 09:18:52 -04:00
|
|
|
return res.status(400).json({ message: 'No audio file provided in the FormData' });
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-12 16:41:04 -05:00
|
|
|
const audioBuffer = await fs.readFile(req.file.path);
|
2024-07-30 09:18:52 -04:00
|
|
|
const audioFile = {
|
|
|
|
|
originalname: req.file.originalname,
|
|
|
|
|
mimetype: req.file.mimetype,
|
|
|
|
|
size: req.file.size,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
try {
|
2025-08-26 12:10:18 -04:00
|
|
|
const [provider, sttSchema] = await this.getProviderSchema(req);
|
2025-09-05 12:01:00 -04:00
|
|
|
const language = req.body?.language || '';
|
|
|
|
|
const text = await this.sttRequest(provider, sttSchema, { audioBuffer, audioFile, language });
|
2024-07-30 09:18:52 -04:00
|
|
|
res.json({ text });
|
|
|
|
|
} catch (error) {
|
2025-12-09 22:25:45 -05:00
|
|
|
logAxiosError({ message: 'An error occurred while processing the audio:', error });
|
2024-07-30 09:18:52 -04:00
|
|
|
res.sendStatus(500);
|
2024-11-12 16:41:04 -05:00
|
|
|
} finally {
|
|
|
|
|
try {
|
|
|
|
|
await fs.unlink(req.file.path);
|
|
|
|
|
logger.debug('[/speech/stt] Temp. audio upload file deleted');
|
2025-08-26 12:10:18 -04:00
|
|
|
} catch {
|
2024-11-12 16:41:04 -05:00
|
|
|
logger.debug('[/speech/stt] Temp. audio upload file already deleted');
|
|
|
|
|
}
|
2024-07-30 09:18:52 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Factory function to create an STTService instance.
|
|
|
|
|
* @async
|
|
|
|
|
* @returns {Promise<STTService>} A promise that resolves to an STTService instance.
|
|
|
|
|
*/
|
|
|
|
|
async function createSTTService() {
|
|
|
|
|
return STTService.getInstance();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Wrapper function for speech-to-text processing.
|
|
|
|
|
* @async
|
|
|
|
|
* @param {Object} req - The request object.
|
|
|
|
|
* @param {Object} res - The response object.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
|
|
|
|
async function speechToText(req, res) {
|
|
|
|
|
const sttService = await createSTTService();
|
2025-08-26 12:10:18 -04:00
|
|
|
await sttService.processSpeechToText(req, res);
|
2024-07-30 09:18:52 -04:00
|
|
|
}
|
|
|
|
|
|
2025-08-27 03:44:39 -04:00
|
|
|
module.exports = { STTService, speechToText };
|