mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
211 lines
6.2 KiB
JavaScript
211 lines
6.2 KiB
JavaScript
const axios = require('axios');
|
|
const { Readable } = require('stream');
|
|
const { logger } = require('~/config');
|
|
const getCustomConfig = require('~/server/services/Config/getCustomConfig');
|
|
const { extractEnvVariable } = require('librechat-data-provider');
|
|
|
|
/**
|
|
* Handle the response from the STT API
|
|
* @param {Object} response - The response from the STT API
|
|
*
|
|
* @returns {string} The text from the response data
|
|
*
|
|
* @throws Will throw an error if the response status is not 200 or the response data is missing
|
|
*/
|
|
async function handleResponse(response) {
|
|
if (response.status !== 200) {
|
|
throw new Error('Invalid response from the STT API');
|
|
}
|
|
|
|
if (!response.data || !response.data.text) {
|
|
throw new Error('Missing data in response from the STT API');
|
|
}
|
|
|
|
return response.data.text.trim();
|
|
}
|
|
|
|
function getProvider(sttSchema) {
|
|
if (sttSchema?.openai) {
|
|
return 'openai';
|
|
}
|
|
|
|
throw new Error('Invalid provider');
|
|
}
|
|
|
|
function removeUndefined(obj) {
|
|
Object.keys(obj).forEach((key) => {
|
|
if (obj[key] && typeof obj[key] === 'object') {
|
|
removeUndefined(obj[key]);
|
|
if (Object.keys(obj[key]).length === 0) {
|
|
delete obj[key];
|
|
}
|
|
} else if (obj[key] === undefined) {
|
|
delete obj[key];
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* This function prepares the necessary data and headers for making a request to the OpenAI API
|
|
* It uses the provided speech-to-text schema and audio stream to create the request
|
|
*
|
|
* @param {Object} sttSchema - The speech-to-text schema containing the OpenAI configuration
|
|
* @param {Stream} audioReadStream - The audio data to be transcribed
|
|
*
|
|
* @returns {Array} An array containing the URL for the API request, the data to be sent, and the headers for the request
|
|
* If an error occurs, it returns an array with three null values and logs the error with logger
|
|
*/
|
|
function openAIProvider(sttSchema, audioReadStream) {
|
|
try {
|
|
const url = sttSchema.openai?.url || 'https://api.openai.com/v1/audio/transcriptions';
|
|
const apiKey = sttSchema.openai.apiKey ? extractEnvVariable(sttSchema.openai.apiKey) : '';
|
|
|
|
let data = {
|
|
file: audioReadStream,
|
|
model: sttSchema.openai.model,
|
|
};
|
|
|
|
let headers = {
|
|
'Content-Type': 'multipart/form-data',
|
|
};
|
|
|
|
[headers].forEach(removeUndefined);
|
|
|
|
if (apiKey) {
|
|
headers.Authorization = 'Bearer ' + apiKey;
|
|
}
|
|
|
|
return [url, data, headers];
|
|
} catch (error) {
|
|
logger.error('An error occurred while preparing the OpenAI API STT request: ', error);
|
|
return [null, null, null];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This function prepares the necessary data and headers for making a request to the Azure API
|
|
* It uses the provided request and audio stream to create the request
|
|
*
|
|
* @param {Object} req - The request object, which should contain the endpoint in its body
|
|
* @param {Stream} audioReadStream - The audio data to be transcribed
|
|
*
|
|
* @returns {Array} An array containing the URL for the API request, the data to be sent, and the headers for the request
|
|
* If an error occurs, it returns an array with three null values and logs the error with logger
|
|
*/
|
|
function azureProvider(req, audioReadStream) {
|
|
try {
|
|
const { endpoint } = req.body;
|
|
const azureConfig = req.app.locals[endpoint];
|
|
|
|
if (!azureConfig) {
|
|
throw new Error(`No configuration found for endpoint: ${endpoint}`);
|
|
}
|
|
|
|
const { apiKey, instanceName, whisperModel, apiVersion } = Object.entries(
|
|
azureConfig.groupMap,
|
|
).reduce((acc, [, value]) => {
|
|
if (acc) {
|
|
return acc;
|
|
}
|
|
|
|
const whisperKey = Object.keys(value.models).find((modelKey) =>
|
|
modelKey.startsWith('whisper'),
|
|
);
|
|
|
|
if (whisperKey) {
|
|
return {
|
|
apiVersion: value.version,
|
|
apiKey: value.apiKey,
|
|
instanceName: value.instanceName,
|
|
whisperModel: value.models[whisperKey]['deploymentName'],
|
|
};
|
|
}
|
|
|
|
return null;
|
|
}, null);
|
|
|
|
if (!apiKey || !instanceName || !whisperModel || !apiVersion) {
|
|
throw new Error('Required Azure configuration values are missing');
|
|
}
|
|
|
|
const baseURL = `https://${instanceName}.openai.azure.com`;
|
|
|
|
const url = `${baseURL}/openai/deployments/${whisperModel}/audio/transcriptions?api-version=${apiVersion}`;
|
|
|
|
let data = {
|
|
file: audioReadStream,
|
|
filename: 'audio.wav',
|
|
contentType: 'audio/wav',
|
|
knownLength: audioReadStream.length,
|
|
};
|
|
|
|
const headers = {
|
|
...data.getHeaders(),
|
|
'Content-Type': 'multipart/form-data',
|
|
'api-key': apiKey,
|
|
};
|
|
|
|
return [url, data, headers];
|
|
} catch (error) {
|
|
logger.error('An error occurred while preparing the Azure API STT request: ', error);
|
|
return [null, null, null];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert speech to text
|
|
* @param {Object} req - The request object
|
|
* @param {Object} res - The response object
|
|
*
|
|
* @returns {Object} The response object with the text from the STT API
|
|
*
|
|
* @throws Will throw an error if an error occurs while processing the audio
|
|
*/
|
|
|
|
async function speechToText(req, res) {
|
|
const customConfig = await getCustomConfig();
|
|
if (!customConfig) {
|
|
return res.status(500).send('Custom config not found');
|
|
}
|
|
|
|
if (!req.file || !req.file.buffer) {
|
|
return res.status(400).json({ message: 'No audio file provided in the FormData' });
|
|
}
|
|
|
|
const audioBuffer = req.file.buffer;
|
|
const audioReadStream = Readable.from(audioBuffer);
|
|
audioReadStream.path = 'audio.wav';
|
|
|
|
const provider = getProvider(customConfig.speech.stt);
|
|
|
|
let [url, data, headers] = [];
|
|
|
|
switch (provider) {
|
|
case 'openai':
|
|
[url, data, headers] = openAIProvider(customConfig.speech.stt, audioReadStream);
|
|
break;
|
|
case 'azure':
|
|
[url, data, headers] = azureProvider(req, audioReadStream);
|
|
break;
|
|
default:
|
|
throw new Error('Invalid provider');
|
|
}
|
|
|
|
if (!Readable.from) {
|
|
const audioBlob = new Blob([audioBuffer], { type: req.file.mimetype });
|
|
delete data['file'];
|
|
data['file'] = audioBlob;
|
|
}
|
|
|
|
try {
|
|
const response = await axios.post(url, data, { headers: headers });
|
|
const text = await handleResponse(response);
|
|
|
|
res.json({ text });
|
|
} catch (error) {
|
|
logger.error('An error occurred while processing the audio:', error);
|
|
res.sendStatus(500);
|
|
}
|
|
}
|
|
|
|
module.exports = speechToText;
|