LibreChat/api/server/utils/import/importers.js

342 lines
12 KiB
JavaScript
Raw Normal View History

📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
const { v4: uuidv4 } = require('uuid');
const { EModelEndpoint, Constants, openAISettings, CacheKeys } = require('librechat-data-provider');
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
const { createImportBatchBuilder } = require('./importBatchBuilder');
const getLogStores = require('~/cache/getLogStores');
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
const logger = require('~/config/winston');
/**
* Returns the appropriate importer function based on the provided JSON data.
*
* @param {Object} jsonData - The JSON data to import.
* @returns {Function} - The importer function.
* @throws {Error} - If the import type is not supported.
*/
function getImporter(jsonData) {
// For ChatGPT
if (Array.isArray(jsonData)) {
logger.info('Importing ChatGPT conversation');
return importChatGptConvo;
}
// For ChatbotUI
if (jsonData.version && Array.isArray(jsonData.history)) {
logger.info('Importing ChatbotUI conversation');
return importChatBotUiConvo;
}
// For LibreChat
if (jsonData.conversationId && (jsonData.messagesTree || jsonData.messages)) {
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
logger.info('Importing LibreChat conversation');
return importLibreChatConvo;
}
throw new Error('Unsupported import type');
}
/**
* Imports a chatbot-ui V1 conversation from a JSON file and saves it to the database.
*
* @param {Object} jsonData - The JSON data containing the chatbot conversation.
* @param {string} requestUserId - The ID of the user making the import request.
* @param {Function} [builderFactory=createImportBatchBuilder] - The factory function to create an import batch builder.
* @returns {Promise<void>} - A promise that resolves when the import is complete.
* @throws {Error} - If there is an error creating the conversation from the JSON file.
*/
async function importChatBotUiConvo(
jsonData,
requestUserId,
builderFactory = createImportBatchBuilder,
) {
// this have been tested with chatbot-ui V1 export https://github.com/mckaywrigley/chatbot-ui/tree/b865b0555f53957e96727bc0bbb369c9eaecd83b#legacy-code
try {
🌿 feat: Fork Messages/Conversations (#2617) * typedef for ImportBatchBuilder * feat: first pass, fork conversations * feat: fork - getMessagesUpToTargetLevel * fix: additional tests and fix getAllMessagesUpToParent * chore: arrow function return * refactor: fork 3 options * chore: remove unused genbuttons * chore: remove unused hover buttons code * feat: fork first pass * wip: fork remember setting * style: user icon * chore: move clear chats to data tab * WIP: fork UI options * feat: data-provider fork types/services/vars and use generic MutationOptions * refactor: use single param for fork option, use enum, fix mongo errors, use Date.now(), add records flag for testing, use endpoint from original convo and messages, pass originalConvo to finishConversation * feat: add fork mutation hook and consolidate type imports * refactor: use enum * feat: first pass, fork mutation * chore: add enum for target level fork option * chore: add enum for target level fork option * show toast when checking remember selection * feat: splitAtTarget * feat: split at target option * feat: navigate to new fork, show toasts, set result query data * feat: hover info for all fork options * refactor: add Messages settings tab * fix(Fork): remember text info * ci: test for single message and is target edge case * feat: additional tests for getAllMessagesUpToParent * ci: additional tests and cycle detection for getMessagesUpToTargetLevel * feat: circular dependency checks for getAllMessagesUpToParent * fix: getMessagesUpToTargetLevel circular dep. check * ci: more tests for getMessagesForConversation * style: hover text for checkbox fork items * refactor: add statefulness to conversation import
2024-05-05 11:48:20 -04:00
/** @type {ImportBatchBuilder} */
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
const importBatchBuilder = builderFactory(requestUserId);
for (const historyItem of jsonData.history) {
importBatchBuilder.startConversation(EModelEndpoint.openAI);
for (const message of historyItem.messages) {
if (message.role === 'assistant') {
importBatchBuilder.addGptMessage(message.content, historyItem.model.id);
} else if (message.role === 'user') {
importBatchBuilder.addUserMessage(message.content);
}
}
importBatchBuilder.finishConversation(historyItem.name, new Date());
}
await importBatchBuilder.saveBatch();
logger.info(`user: ${requestUserId} | ChatbotUI conversation imported`);
} catch (error) {
logger.error(`user: ${requestUserId} | Error creating conversation from ChatbotUI file`, error);
}
}
/**
* Imports a LibreChat conversation from JSON.
*
* @param {Object} jsonData - The JSON data representing the conversation.
* @param {string} requestUserId - The ID of the user making the import request.
* @param {Function} [builderFactory=createImportBatchBuilder] - The factory function to create an import batch builder.
* @returns {Promise<void>} - A promise that resolves when the import is complete.
*/
async function importLibreChatConvo(
jsonData,
requestUserId,
builderFactory = createImportBatchBuilder,
) {
try {
🌿 feat: Fork Messages/Conversations (#2617) * typedef for ImportBatchBuilder * feat: first pass, fork conversations * feat: fork - getMessagesUpToTargetLevel * fix: additional tests and fix getAllMessagesUpToParent * chore: arrow function return * refactor: fork 3 options * chore: remove unused genbuttons * chore: remove unused hover buttons code * feat: fork first pass * wip: fork remember setting * style: user icon * chore: move clear chats to data tab * WIP: fork UI options * feat: data-provider fork types/services/vars and use generic MutationOptions * refactor: use single param for fork option, use enum, fix mongo errors, use Date.now(), add records flag for testing, use endpoint from original convo and messages, pass originalConvo to finishConversation * feat: add fork mutation hook and consolidate type imports * refactor: use enum * feat: first pass, fork mutation * chore: add enum for target level fork option * chore: add enum for target level fork option * show toast when checking remember selection * feat: splitAtTarget * feat: split at target option * feat: navigate to new fork, show toasts, set result query data * feat: hover info for all fork options * refactor: add Messages settings tab * fix(Fork): remember text info * ci: test for single message and is target edge case * feat: additional tests for getAllMessagesUpToParent * ci: additional tests and cycle detection for getMessagesUpToTargetLevel * feat: circular dependency checks for getAllMessagesUpToParent * fix: getMessagesUpToTargetLevel circular dep. check * ci: more tests for getMessagesForConversation * style: hover text for checkbox fork items * refactor: add statefulness to conversation import
2024-05-05 11:48:20 -04:00
/** @type {ImportBatchBuilder} */
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
const importBatchBuilder = builderFactory(requestUserId);
const options = jsonData.options || {};
/* Endpoint configuration */
let endpoint = jsonData.endpoint ?? options.endpoint ?? EModelEndpoint.openAI;
const cache = getLogStores(CacheKeys.CONFIG_STORE);
const endpointsConfig = await cache.get(CacheKeys.ENDPOINT_CONFIG);
const endpointConfig = endpointsConfig?.[endpoint];
if (!endpointConfig && endpointsConfig) {
endpoint = Object.keys(endpointsConfig)[0];
} else if (!endpointConfig) {
endpoint = EModelEndpoint.openAI;
}
importBatchBuilder.startConversation(endpoint);
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
let firstMessageDate = null;
const messagesToImport = jsonData.messagesTree || jsonData.messages;
if (jsonData.recursive) {
/**
* Recursively traverse the messages tree and save each message to the database.
* @param {TMessage[]} messages
* @param {string} parentMessageId
*/
const traverseMessages = async (messages, parentMessageId = null) => {
for (const message of messages) {
if (!message.text) {
continue;
}
let savedMessage;
if (message.sender?.toLowerCase() === 'user' || message.isCreatedByUser) {
savedMessage = await importBatchBuilder.saveMessage({
text: message.text,
sender: 'user',
isCreatedByUser: true,
parentMessageId: parentMessageId,
});
} else {
savedMessage = await importBatchBuilder.saveMessage({
text: message.text,
sender: message.sender,
isCreatedByUser: false,
model: options.model,
parentMessageId: parentMessageId,
});
}
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
if (!firstMessageDate) {
firstMessageDate = new Date(message.createdAt);
}
if (message.children && message.children.length > 0) {
await traverseMessages(message.children, savedMessage.messageId);
}
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
}
};
await traverseMessages(messagesToImport);
} else if (messagesToImport) {
const idMapping = new Map();
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
for (const message of messagesToImport) {
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
if (!firstMessageDate) {
firstMessageDate = new Date(message.createdAt);
}
const newMessageId = uuidv4();
idMapping.set(message.messageId, newMessageId);
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
const clonedMessage = {
...message,
messageId: newMessageId,
parentMessageId:
message.parentMessageId && message.parentMessageId !== Constants.NO_PARENT
? idMapping.get(message.parentMessageId) || Constants.NO_PARENT
: Constants.NO_PARENT,
};
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
importBatchBuilder.saveMessage(clonedMessage);
}
} else {
throw new Error('Invalid LibreChat file format');
}
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
importBatchBuilder.finishConversation(jsonData.title, firstMessageDate ?? new Date(), options);
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
await importBatchBuilder.saveBatch();
logger.debug(`user: ${requestUserId} | Conversation "${jsonData.title}" imported`);
} catch (error) {
logger.error(`user: ${requestUserId} | Error creating conversation from LibreChat file`, error);
}
}
/**
* Imports ChatGPT conversations from provided JSON data.
* Initializes the import process by creating a batch builder and processing each conversation in the data.
*
* @param {ChatGPTConvo[]} jsonData - Array of conversation objects to be imported.
* @param {string} requestUserId - The ID of the user who initiated the import process.
* @param {Function} builderFactory - Factory function to create a new import batch builder instance, defaults to createImportBatchBuilder.
* @returns {Promise<void>} Promise that resolves when all conversations have been imported.
*/
async function importChatGptConvo(
jsonData,
requestUserId,
builderFactory = createImportBatchBuilder,
) {
try {
const importBatchBuilder = builderFactory(requestUserId);
for (const conv of jsonData) {
processConversation(conv, importBatchBuilder, requestUserId);
}
await importBatchBuilder.saveBatch();
} catch (error) {
logger.error(`user: ${requestUserId} | Error creating conversation from imported file`, error);
}
}
/**
* Processes a single conversation, adding messages to the batch builder based on author roles and handling text content.
* It directly manages the addition of messages for different roles and handles citations for assistant messages.
*
* @param {ChatGPTConvo} conv - A single conversation object that contains multiple messages and other details.
🌿 feat: Fork Messages/Conversations (#2617) * typedef for ImportBatchBuilder * feat: first pass, fork conversations * feat: fork - getMessagesUpToTargetLevel * fix: additional tests and fix getAllMessagesUpToParent * chore: arrow function return * refactor: fork 3 options * chore: remove unused genbuttons * chore: remove unused hover buttons code * feat: fork first pass * wip: fork remember setting * style: user icon * chore: move clear chats to data tab * WIP: fork UI options * feat: data-provider fork types/services/vars and use generic MutationOptions * refactor: use single param for fork option, use enum, fix mongo errors, use Date.now(), add records flag for testing, use endpoint from original convo and messages, pass originalConvo to finishConversation * feat: add fork mutation hook and consolidate type imports * refactor: use enum * feat: first pass, fork mutation * chore: add enum for target level fork option * chore: add enum for target level fork option * show toast when checking remember selection * feat: splitAtTarget * feat: split at target option * feat: navigate to new fork, show toasts, set result query data * feat: hover info for all fork options * refactor: add Messages settings tab * fix(Fork): remember text info * ci: test for single message and is target edge case * feat: additional tests for getAllMessagesUpToParent * ci: additional tests and cycle detection for getMessagesUpToTargetLevel * feat: circular dependency checks for getAllMessagesUpToParent * fix: getMessagesUpToTargetLevel circular dep. check * ci: more tests for getMessagesForConversation * style: hover text for checkbox fork items * refactor: add statefulness to conversation import
2024-05-05 11:48:20 -04:00
* @param {ImportBatchBuilder} importBatchBuilder - The batch builder instance used to manage and batch conversation data.
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
* @param {string} requestUserId - The ID of the user who initiated the import process.
* @returns {void}
*/
function processConversation(conv, importBatchBuilder, requestUserId) {
importBatchBuilder.startConversation(EModelEndpoint.openAI);
// Map all message IDs to new UUIDs
const messageMap = new Map();
for (const [id, mapping] of Object.entries(conv.mapping)) {
if (mapping.message && mapping.message.content.content_type) {
const newMessageId = uuidv4();
messageMap.set(id, newMessageId);
}
}
// Create and save messages using the mapped IDs
const messages = [];
for (const [id, mapping] of Object.entries(conv.mapping)) {
const role = mapping.message?.author?.role;
if (!mapping.message) {
messageMap.delete(id);
continue;
} else if (role === 'system') {
messageMap.delete(id);
continue;
}
const newMessageId = messageMap.get(id);
const parentMessageId =
mapping.parent && messageMap.has(mapping.parent)
? messageMap.get(mapping.parent)
: Constants.NO_PARENT;
const messageText = formatMessageText(mapping.message);
const isCreatedByUser = role === 'user';
let sender = isCreatedByUser ? 'user' : 'GPT-3.5';
const model = mapping.message.metadata.model_slug || openAISettings.model.default;
if (model.includes('gpt-4')) {
📥 feat: Import Conversations from LibreChat, ChatGPT, Chatbot UI (#2355) * Basic implementation of ChatGPT conversation import * remove debug code * Handle citations * Fix updatedAt in import * update default model * Use job scheduler to handle import requests * import job status endpoint * Add wrapper around Agenda * Rate limits for import endpoint * rename import api path * Batch save import to mongo * Improve naming * Add documenting comments * Test for importers * Change button for importing conversations * Frontend changes * Import job status endpoint * Import endpoint response * Add translations to new phrases * Fix conversations refreshing * cleanup unused functions * set timeout for import job status polling * Add documentation * get extra spaces back * Improve error message * Fix translation files after merge * fix translation files 2 * Add zh translation for import functionality * Sync mailisearch index after import * chore: add dummy uri for jest tests, as MONGO_URI should only be real for E2E tests * docs: fix links * docs: fix conversationsImport section * fix: user role issue for librechat imports * refactor: import conversations from json - organize imports - add additional jsdocs - use multer with diskStorage to avoid loading file into memory outside of job - use filepath instead of loading data string for imports - replace console logs and some logger.info() with logger.debug - only use multer for import route * fix: undefined metadata edge case and replace ChatGtp -> ChatGpt * Refactor importChatGptConvo function to handle undefined metadata edge case and replace ChatGtp with ChatGpt * fix: chatgpt importer * feat: maintain tree relationship for librechat messages * chore: use enum * refactor: saveMessage to use single object arg, replace console logs, add userId to log message * chore: additional comment * chore: multer edge case * feat: first pass, maintain tree relationship * chore: organize * chore: remove log * ci: add heirarchy test for chatgpt * ci: test maintaining of heirarchy for librechat * wip: allow non-text content type messages * refactor: import content part object json string * refactor: more content types to format * chore: consolidate messageText formatting * docs: update on changes, bump data-provider/config versions, update readme * refactor(indexSync): singleton pattern for MeiliSearchClient * refactor: debug log after batch is done * chore: add back indexSync error handling --------- Co-authored-by: jakubmieszczak <jakub.mieszczak@zendesk.com> Co-authored-by: Danny Avila <danny@librechat.ai>
2024-05-02 08:48:26 +02:00
sender = 'GPT-4';
}
messages.push({
messageId: newMessageId,
parentMessageId,
text: messageText,
sender,
isCreatedByUser,
model,
user: requestUserId,
endpoint: EModelEndpoint.openAI,
});
}
for (const message of messages) {
importBatchBuilder.saveMessage(message);
}
importBatchBuilder.finishConversation(conv.title, new Date(conv.create_time * 1000));
}
/**
* Processes text content of messages authored by an assistant, inserting citation links as required.
* Applies citation metadata to construct regex patterns and replacements for inserting links into the text.
*
* @param {ChatGPTMessage} messageData - The message data containing metadata about citations.
* @param {string} messageText - The original text of the message which may be altered by inserting citation links.
* @returns {string} - The updated message text after processing for citations.
*/
function processAssistantMessage(messageData, messageText) {
const citations = messageData.metadata.citations ?? [];
for (const citation of citations) {
if (
!citation.metadata ||
!citation.metadata.extra ||
!citation.metadata.extra.cited_message_idx ||
(citation.metadata.type && citation.metadata.type !== 'webpage')
) {
continue;
}
const pattern = new RegExp(
`\\u3010${citation.metadata.extra.cited_message_idx}\\u2020.+?\\u3011`,
'g',
);
const replacement = ` ([${citation.metadata.title}](${citation.metadata.url}))`;
messageText = messageText.replace(pattern, replacement);
}
return messageText;
}
/**
* Formats the text content of a message based on its content type and author role.
* @param {ChatGPTMessage} messageData - The message data.
* @returns {string} - The updated message text after processing.
*/
function formatMessageText(messageData) {
const isText = messageData.content.content_type === 'text';
let messageText = '';
if (isText && messageData.content.parts) {
messageText = messageData.content.parts.join(' ');
} else if (messageData.content.content_type === 'code') {
messageText = `\`\`\`${messageData.content.language}\n${messageData.content.text}\n\`\`\``;
} else if (messageData.content.content_type === 'execution_output') {
messageText = `Execution Output:\n> ${messageData.content.text}`;
} else if (messageData.content.parts) {
for (const part of messageData.content.parts) {
if (typeof part === 'string') {
messageText += part + ' ';
} else if (typeof part === 'object') {
messageText = `\`\`\`json\n${JSON.stringify(part, null, 2)}\n\`\`\`\n`;
}
}
messageText = messageText.trim();
} else {
messageText = `\`\`\`json\n${JSON.stringify(messageData.content, null, 2)}\n\`\`\``;
}
if (isText && messageData.author.role !== 'user') {
messageText = processAssistantMessage(messageData, messageText);
}
return messageText;
}
module.exports = { getImporter };