mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
📜 fix: Better OpenAI Assistants Annotation Processing (#3565)
* fix: correctly handle openai assistants annotations * fix: Handle adjacent identical citations only for our specific format * fix: correctly handle adjacent identical citations * refactor: make regex handling more robust * fix: skip annotation condition, make regex handling more robust * refactor: Handle FILE_PATH and FILE_CITATION annotation types in processMessages.spec.js * ci: unit tests for real file path type annotations
This commit is contained in:
parent
c2a79aee1b
commit
270c6d2350
2 changed files with 1097 additions and 123 deletions
|
|
@ -11,7 +11,6 @@ const { recordMessage, getMessages } = require('~/models/Message');
|
||||||
const { saveConvo } = require('~/models/Conversation');
|
const { saveConvo } = require('~/models/Conversation');
|
||||||
const spendTokens = require('~/models/spendTokens');
|
const spendTokens = require('~/models/spendTokens');
|
||||||
const { countTokens } = require('~/server/utils');
|
const { countTokens } = require('~/server/utils');
|
||||||
const { logger } = require('~/config');
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes a new thread or adds messages to an existing thread.
|
* Initializes a new thread or adds messages to an existing thread.
|
||||||
|
|
@ -516,80 +515,34 @@ const recordUsage = async ({
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
const uniqueCitationStart = '^====||===';
|
||||||
* Creates a replaceAnnotation function with internal state for tracking the index offset.
|
const uniqueCitationEnd = '==|||||^';
|
||||||
*
|
|
||||||
* @returns {function} The replaceAnnotation function with closure for index offset.
|
/** Helper function to escape special characters in regex
|
||||||
|
* @param {string} string - The string to escape.
|
||||||
|
* @returns {string} The escaped string.
|
||||||
*/
|
*/
|
||||||
function createReplaceAnnotation() {
|
function escapeRegExp(string) {
|
||||||
let indexOffset = 0;
|
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
|
|
||||||
/**
|
|
||||||
* Safely replaces the annotated text within the specified range denoted by start_index and end_index,
|
|
||||||
* after verifying that the text within that range matches the given annotation text.
|
|
||||||
* Proceeds with the replacement even if a mismatch is found, but logs a warning.
|
|
||||||
*
|
|
||||||
* @param {object} params The original text content.
|
|
||||||
* @param {string} params.currentText The current text content, with/without replacements.
|
|
||||||
* @param {number} params.start_index The starting index where replacement should begin.
|
|
||||||
* @param {number} params.end_index The ending index where replacement should end.
|
|
||||||
* @param {string} params.expectedText The text expected to be found in the specified range.
|
|
||||||
* @param {string} params.replacementText The text to insert in place of the existing content.
|
|
||||||
* @returns {string} The text with the replacement applied, regardless of text match.
|
|
||||||
*/
|
|
||||||
function replaceAnnotation({
|
|
||||||
currentText,
|
|
||||||
start_index,
|
|
||||||
end_index,
|
|
||||||
expectedText,
|
|
||||||
replacementText,
|
|
||||||
}) {
|
|
||||||
const adjustedStartIndex = start_index + indexOffset;
|
|
||||||
const adjustedEndIndex = end_index + indexOffset;
|
|
||||||
|
|
||||||
if (
|
|
||||||
adjustedStartIndex < 0 ||
|
|
||||||
adjustedEndIndex > currentText.length ||
|
|
||||||
adjustedStartIndex > adjustedEndIndex
|
|
||||||
) {
|
|
||||||
logger.warn(`Invalid range specified for annotation replacement.
|
|
||||||
Attempting replacement with \`replace\` method instead...
|
|
||||||
length: ${currentText.length}
|
|
||||||
start_index: ${adjustedStartIndex}
|
|
||||||
end_index: ${adjustedEndIndex}`);
|
|
||||||
return currentText.replace(expectedText, replacementText);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (currentText.substring(adjustedStartIndex, adjustedEndIndex) !== expectedText) {
|
|
||||||
return currentText.replace(expectedText, replacementText);
|
|
||||||
}
|
|
||||||
|
|
||||||
indexOffset += replacementText.length - (adjustedEndIndex - adjustedStartIndex);
|
|
||||||
return (
|
|
||||||
currentText.slice(0, adjustedStartIndex) +
|
|
||||||
replacementText +
|
|
||||||
currentText.slice(adjustedEndIndex)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return replaceAnnotation;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sorts, processes, and flattens messages to a single string.
|
* Sorts, processes, and flattens messages to a single string.
|
||||||
*
|
*
|
||||||
* @param {object} params - The OpenAI client instance.
|
* @param {object} params - The parameters for processing messages.
|
||||||
* @param {OpenAIClient} params.openai - The OpenAI client instance.
|
* @param {OpenAIClient} params.openai - The OpenAI client instance.
|
||||||
* @param {RunClient} params.client - The LibreChat client that manages the run: either refers to `OpenAI` or `StreamRunManager`.
|
* @param {RunClient} params.client - The LibreChat client that manages the run: either refers to `OpenAI` or `StreamRunManager`.
|
||||||
* @param {ThreadMessage[]} params.messages - An array of messages.
|
* @param {ThreadMessage[]} params.messages - An array of messages.
|
||||||
* @returns {Promise<{messages: ThreadMessage[], text: string}>} The sorted messages and the flattened text.
|
* @returns {Promise<{messages: ThreadMessage[], text: string, edited: boolean}>} The sorted messages, the flattened text, and whether it was edited.
|
||||||
*/
|
*/
|
||||||
async function processMessages({ openai, client, messages = [] }) {
|
async function processMessages({ openai, client, messages = [] }) {
|
||||||
const sorted = messages.sort((a, b) => a.created_at - b.created_at);
|
const sorted = messages.sort((a, b) => a.created_at - b.created_at);
|
||||||
|
|
||||||
let text = '';
|
let text = '';
|
||||||
let edited = false;
|
let edited = false;
|
||||||
const sources = [];
|
const sources = new Map();
|
||||||
|
const fileRetrievalPromises = [];
|
||||||
|
|
||||||
for (const message of sorted) {
|
for (const message of sorted) {
|
||||||
message.files = [];
|
message.files = [];
|
||||||
for (const content of message.content) {
|
for (const content of message.content) {
|
||||||
|
|
@ -598,15 +551,21 @@ async function processMessages({ openai, client, messages = [] }) {
|
||||||
const currentFileId = contentType?.file_id;
|
const currentFileId = contentType?.file_id;
|
||||||
|
|
||||||
if (type === ContentTypes.IMAGE_FILE && !client.processedFileIds.has(currentFileId)) {
|
if (type === ContentTypes.IMAGE_FILE && !client.processedFileIds.has(currentFileId)) {
|
||||||
const file = await retrieveAndProcessFile({
|
fileRetrievalPromises.push(
|
||||||
|
retrieveAndProcessFile({
|
||||||
openai,
|
openai,
|
||||||
client,
|
client,
|
||||||
file_id: currentFileId,
|
file_id: currentFileId,
|
||||||
basename: `${currentFileId}.png`,
|
basename: `${currentFileId}.png`,
|
||||||
});
|
})
|
||||||
|
.then((file) => {
|
||||||
client.processedFileIds.add(currentFileId);
|
client.processedFileIds.add(currentFileId);
|
||||||
message.files.push(file);
|
message.files.push(file);
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
console.error(`Failed to retrieve file: ${error.message}`);
|
||||||
|
}),
|
||||||
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -615,40 +574,23 @@ async function processMessages({ openai, client, messages = [] }) {
|
||||||
/** @type {{ annotations: Annotation[] }} */
|
/** @type {{ annotations: Annotation[] }} */
|
||||||
const { annotations } = contentType ?? {};
|
const { annotations } = contentType ?? {};
|
||||||
|
|
||||||
// Process annotations if they exist
|
|
||||||
if (!annotations?.length) {
|
if (!annotations?.length) {
|
||||||
text += currentText + ' ';
|
text += currentText;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const originalText = currentText;
|
const replacements = [];
|
||||||
text += originalText;
|
const annotationPromises = annotations.map(async (annotation) => {
|
||||||
|
|
||||||
const replaceAnnotation = createReplaceAnnotation();
|
|
||||||
|
|
||||||
logger.debug('[processMessages] Processing annotations:', annotations);
|
|
||||||
for (const annotation of annotations) {
|
|
||||||
let file;
|
|
||||||
const type = annotation.type;
|
const type = annotation.type;
|
||||||
const annotationType = annotation[type];
|
const annotationType = annotation[type];
|
||||||
const file_id = annotationType?.file_id;
|
const file_id = annotationType?.file_id;
|
||||||
const alreadyProcessed = client.processedFileIds.has(file_id);
|
const alreadyProcessed = client.processedFileIds.has(file_id);
|
||||||
|
|
||||||
const replaceCurrentAnnotation = (replacementText = '') => {
|
let file;
|
||||||
const { start_index, end_index, text: expectedText } = annotation;
|
let replacementText = '';
|
||||||
currentText = replaceAnnotation({
|
|
||||||
originalText,
|
|
||||||
currentText,
|
|
||||||
start_index,
|
|
||||||
end_index,
|
|
||||||
expectedText,
|
|
||||||
replacementText,
|
|
||||||
});
|
|
||||||
edited = true;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
try {
|
||||||
if (alreadyProcessed) {
|
if (alreadyProcessed) {
|
||||||
const { file_id } = annotationType || {};
|
|
||||||
file = await retrieveAndProcessFile({ openai, client, file_id, unknownType: true });
|
file = await retrieveAndProcessFile({ openai, client, file_id, unknownType: true });
|
||||||
} else if (type === AnnotationTypes.FILE_PATH) {
|
} else if (type === AnnotationTypes.FILE_PATH) {
|
||||||
const basename = path.basename(annotation.text);
|
const basename = path.basename(annotation.text);
|
||||||
|
|
@ -658,37 +600,86 @@ async function processMessages({ openai, client, messages = [] }) {
|
||||||
file_id,
|
file_id,
|
||||||
basename,
|
basename,
|
||||||
});
|
});
|
||||||
replaceCurrentAnnotation(file.filepath);
|
replacementText = file.filepath;
|
||||||
} else if (type === AnnotationTypes.FILE_CITATION) {
|
} else if (type === AnnotationTypes.FILE_CITATION && file_id) {
|
||||||
file = await retrieveAndProcessFile({
|
file = await retrieveAndProcessFile({
|
||||||
openai,
|
openai,
|
||||||
client,
|
client,
|
||||||
file_id,
|
file_id,
|
||||||
unknownType: true,
|
unknownType: true,
|
||||||
});
|
});
|
||||||
sources.push(file.filename);
|
if (file && file.filename) {
|
||||||
replaceCurrentAnnotation(`^${sources.length}^`);
|
if (!sources.has(file.filename)) {
|
||||||
}
|
sources.set(file.filename, sources.size + 1);
|
||||||
|
}
|
||||||
text = currentText;
|
replacementText = `${uniqueCitationStart}${sources.get(
|
||||||
|
file.filename,
|
||||||
if (!file) {
|
)}${uniqueCitationEnd}`;
|
||||||
continue;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (file && replacementText) {
|
||||||
|
replacements.push({
|
||||||
|
start: annotation.start_index,
|
||||||
|
end: annotation.end_index,
|
||||||
|
text: replacementText,
|
||||||
|
});
|
||||||
|
edited = true;
|
||||||
|
if (!alreadyProcessed) {
|
||||||
client.processedFileIds.add(file_id);
|
client.processedFileIds.add(file_id);
|
||||||
message.files.push(file);
|
message.files.push(file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to process annotation: ${error.message}`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await Promise.all(annotationPromises);
|
||||||
|
|
||||||
|
// Apply replacements in reverse order
|
||||||
|
replacements.sort((a, b) => b.start - a.start);
|
||||||
|
for (const { start, end, text: replacementText } of replacements) {
|
||||||
|
currentText = currentText.slice(0, start) + replacementText + currentText.slice(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sources.length) {
|
text += currentText;
|
||||||
text += '\n\n';
|
|
||||||
for (let i = 0; i < sources.length; i++) {
|
|
||||||
text += `^${i + 1}.^ ${sources[i]}${i === sources.length - 1 ? '' : '\n'}`;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await Promise.all(fileRetrievalPromises);
|
||||||
|
|
||||||
|
// Handle adjacent identical citations with the unique format
|
||||||
|
const adjacentCitationRegex = new RegExp(
|
||||||
|
`${escapeRegExp(uniqueCitationStart)}(\\d+)${escapeRegExp(
|
||||||
|
uniqueCitationEnd,
|
||||||
|
)}(\\s*)${escapeRegExp(uniqueCitationStart)}(\\d+)${escapeRegExp(uniqueCitationEnd)}`,
|
||||||
|
'g',
|
||||||
|
);
|
||||||
|
text = text.replace(adjacentCitationRegex, (match, num1, space, num2) => {
|
||||||
|
return num1 === num2
|
||||||
|
? `${uniqueCitationStart}${num1}${uniqueCitationEnd}`
|
||||||
|
: `${uniqueCitationStart}${num1}${uniqueCitationEnd}${space}${uniqueCitationStart}${num2}${uniqueCitationEnd}`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Remove any remaining adjacent identical citations
|
||||||
|
const remainingAdjacentRegex = new RegExp(
|
||||||
|
`(${escapeRegExp(uniqueCitationStart)}(\\d+)${escapeRegExp(uniqueCitationEnd)})\\s*\\1+`,
|
||||||
|
'g',
|
||||||
|
);
|
||||||
|
text = text.replace(remainingAdjacentRegex, '$1');
|
||||||
|
|
||||||
|
// Replace the unique citation format with the final format
|
||||||
|
text = text.replace(new RegExp(escapeRegExp(uniqueCitationStart), 'g'), '^');
|
||||||
|
text = text.replace(new RegExp(escapeRegExp(uniqueCitationEnd), 'g'), '^');
|
||||||
|
|
||||||
|
if (sources.size) {
|
||||||
|
text += '\n\n';
|
||||||
|
Array.from(sources.entries()).forEach(([source, index], arrayIndex) => {
|
||||||
|
text += `^${index}.^ ${source}${arrayIndex === sources.size - 1 ? '' : '\n'}`;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
return { messages: sorted, text, edited };
|
return { messages: sorted, text, edited };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
983
api/server/services/Threads/processMessages.spec.js
Normal file
983
api/server/services/Threads/processMessages.spec.js
Normal file
|
|
@ -0,0 +1,983 @@
|
||||||
|
const { retrieveAndProcessFile } = require('~/server/services/Files/process');
|
||||||
|
const { processMessages } = require('./manage');
|
||||||
|
|
||||||
|
jest.mock('~/server/services/Files/process', () => ({
|
||||||
|
retrieveAndProcessFile: jest.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
describe('processMessages', () => {
|
||||||
|
let openai, client;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
openai = {};
|
||||||
|
client = {
|
||||||
|
processedFileIds: new Set(),
|
||||||
|
};
|
||||||
|
jest.clearAllMocks();
|
||||||
|
retrieveAndProcessFile.mockReset();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles normal case with single source', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value: 'This is a test ^1^ and another^1^',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 15,
|
||||||
|
end_index: 18,
|
||||||
|
file_citation: { file_id: 'file1' },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 30,
|
||||||
|
end_index: 33,
|
||||||
|
file_citation: { file_id: 'file1' },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
|
||||||
|
|
||||||
|
const result = await processMessages({ openai, client, messages });
|
||||||
|
|
||||||
|
expect(result.text).toBe('This is a test ^1^ and another^1^\n\n^1.^ test.txt');
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles multiple different sources', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value: 'This is a test ^1^ and another^2^',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 15,
|
||||||
|
end_index: 18,
|
||||||
|
file_citation: { file_id: 'file1' },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 30,
|
||||||
|
end_index: 33,
|
||||||
|
file_citation: { file_id: 'file2' },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile
|
||||||
|
.mockResolvedValueOnce({ filename: 'test1.txt' })
|
||||||
|
.mockResolvedValueOnce({ filename: 'test2.txt' });
|
||||||
|
|
||||||
|
const result = await processMessages({ openai, client, messages });
|
||||||
|
|
||||||
|
expect(result.text).toBe('This is a test ^1^ and another^2^\n\n^1.^ test1.txt\n^2.^ test2.txt');
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles file retrieval failure', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value: 'This is a test ^1^',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 15,
|
||||||
|
end_index: 18,
|
||||||
|
file_citation: { file_id: 'file1' },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockRejectedValue(new Error('File not found'));
|
||||||
|
|
||||||
|
const result = await processMessages({ openai, client, messages });
|
||||||
|
|
||||||
|
expect(result.text).toBe('This is a test ^1^');
|
||||||
|
expect(result.edited).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles citations without file ids', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value: 'This is a test ^1^',
|
||||||
|
annotations: [{ type: 'file_citation', start_index: 15, end_index: 18 }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = await processMessages({ openai, client, messages });
|
||||||
|
|
||||||
|
expect(result.text).toBe('This is a test ^1^');
|
||||||
|
expect(result.edited).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles mixed valid and invalid citations', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value: 'This is a test ^1^ and ^2^ and ^3^',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 15,
|
||||||
|
end_index: 18,
|
||||||
|
file_citation: { file_id: 'file1' },
|
||||||
|
},
|
||||||
|
{ type: 'file_citation', start_index: 23, end_index: 26 },
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 31,
|
||||||
|
end_index: 34,
|
||||||
|
file_citation: { file_id: 'file3' },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile
|
||||||
|
.mockResolvedValueOnce({ filename: 'test1.txt' })
|
||||||
|
.mockResolvedValueOnce({ filename: 'test3.txt' });
|
||||||
|
|
||||||
|
const result = await processMessages({ openai, client, messages });
|
||||||
|
|
||||||
|
expect(result.text).toBe(
|
||||||
|
'This is a test ^1^ and ^2^ and ^2^\n\n^1.^ test1.txt\n^2.^ test3.txt',
|
||||||
|
);
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles adjacent identical citations', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value: 'This is a test ^1^^1^ and ^1^ ^1^',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 15,
|
||||||
|
end_index: 18,
|
||||||
|
file_citation: { file_id: 'file1' },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 18,
|
||||||
|
end_index: 21,
|
||||||
|
file_citation: { file_id: 'file1' },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 26,
|
||||||
|
end_index: 29,
|
||||||
|
file_citation: { file_id: 'file1' },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
start_index: 30,
|
||||||
|
end_index: 33,
|
||||||
|
file_citation: { file_id: 'file1' },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
|
||||||
|
|
||||||
|
const result = await processMessages({ openai, client, messages });
|
||||||
|
|
||||||
|
expect(result.text).toBe('This is a test ^1^ and ^1^\n\n^1.^ test.txt');
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
test('handles real data with multiple adjacent citations', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
id: 'msg_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
object: 'thread.message',
|
||||||
|
created_at: 1722980324,
|
||||||
|
assistant_id: 'asst_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
thread_id: 'thread_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
run_id: 'run_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
status: 'completed',
|
||||||
|
incomplete_details: null,
|
||||||
|
incomplete_at: null,
|
||||||
|
completed_at: 1722980331,
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value:
|
||||||
|
'The text you have uploaded is from the book "Harry Potter and the Philosopher\'s Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander\'s【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher\'s Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry\'s initial experiences in the magical world and set the stage for his adventures at Hogwarts.',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:2†source】',
|
||||||
|
start_index: 420,
|
||||||
|
end_index: 433,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:4†source】',
|
||||||
|
start_index: 433,
|
||||||
|
end_index: 446,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:9†source】',
|
||||||
|
start_index: 578,
|
||||||
|
end_index: 591,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:14†source】',
|
||||||
|
start_index: 591,
|
||||||
|
end_index: 605,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:12†source】',
|
||||||
|
start_index: 767,
|
||||||
|
end_index: 781,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:18†source】',
|
||||||
|
start_index: 781,
|
||||||
|
end_index: 795,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:16†source】',
|
||||||
|
start_index: 935,
|
||||||
|
end_index: 949,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:1†source】',
|
||||||
|
start_index: 1114,
|
||||||
|
end_index: 1127,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:10†source】',
|
||||||
|
start_index: 1127,
|
||||||
|
end_index: 1141,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:7†source】',
|
||||||
|
start_index: 1141,
|
||||||
|
end_index: 1154,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
attachments: [],
|
||||||
|
metadata: {},
|
||||||
|
files: [
|
||||||
|
{
|
||||||
|
object: 'file',
|
||||||
|
id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
purpose: 'assistants',
|
||||||
|
filename: 'hp1.txt',
|
||||||
|
bytes: 439742,
|
||||||
|
created_at: 1722962139,
|
||||||
|
status: 'processed',
|
||||||
|
status_details: null,
|
||||||
|
type: 'text/plain',
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
filepath:
|
||||||
|
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/hp1.txt',
|
||||||
|
usage: 1,
|
||||||
|
user: 'XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
context: 'assistants',
|
||||||
|
source: 'openai',
|
||||||
|
model: 'gpt-4o',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockResolvedValue({ filename: 'hp1.txt' });
|
||||||
|
|
||||||
|
const result = await processMessages({
|
||||||
|
openai: {},
|
||||||
|
client: { processedFileIds: new Set() },
|
||||||
|
messages,
|
||||||
|
});
|
||||||
|
|
||||||
|
const expectedText = `The text you have uploaded is from the book "Harry Potter and the Philosopher's Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:
|
||||||
|
|
||||||
|
1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry^1^.
|
||||||
|
|
||||||
|
2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander's^1^.
|
||||||
|
|
||||||
|
3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background^1^.
|
||||||
|
|
||||||
|
4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy^1^.
|
||||||
|
|
||||||
|
5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher's Stone and its connection to the dark wizard Voldemort^1^.
|
||||||
|
|
||||||
|
These points highlight Harry's initial experiences in the magical world and set the stage for his adventures at Hogwarts.
|
||||||
|
|
||||||
|
^1.^ hp1.txt`;
|
||||||
|
|
||||||
|
expect(result.text).toBe(expectedText);
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles real data with multiple adjacent citations with multiple sources', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
id: 'msg_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
object: 'thread.message',
|
||||||
|
created_at: 1722980324,
|
||||||
|
assistant_id: 'asst_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
thread_id: 'thread_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
run_id: 'run_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
status: 'completed',
|
||||||
|
incomplete_details: null,
|
||||||
|
incomplete_at: null,
|
||||||
|
completed_at: 1722980331,
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value:
|
||||||
|
'The text you have uploaded is from the book "Harry Potter and the Philosopher\'s Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander\'s【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher\'s Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry\'s initial experiences in the magical world and set the stage for his adventures at Hogwarts.',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:2†source】',
|
||||||
|
start_index: 420,
|
||||||
|
end_index: 433,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:4†source】',
|
||||||
|
start_index: 433,
|
||||||
|
end_index: 446,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:9†source】',
|
||||||
|
start_index: 578,
|
||||||
|
end_index: 591,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:14†source】',
|
||||||
|
start_index: 591,
|
||||||
|
end_index: 605,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:12†source】',
|
||||||
|
start_index: 767,
|
||||||
|
end_index: 781,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:18†source】',
|
||||||
|
start_index: 781,
|
||||||
|
end_index: 795,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:16†source】',
|
||||||
|
start_index: 935,
|
||||||
|
end_index: 949,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:1†source】',
|
||||||
|
start_index: 1114,
|
||||||
|
end_index: 1127,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:10†source】',
|
||||||
|
start_index: 1127,
|
||||||
|
end_index: 1141,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:7†source】',
|
||||||
|
start_index: 1141,
|
||||||
|
end_index: 1154,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
attachments: [],
|
||||||
|
metadata: {},
|
||||||
|
files: [
|
||||||
|
{
|
||||||
|
object: 'file',
|
||||||
|
id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
purpose: 'assistants',
|
||||||
|
filename: 'hp1.txt',
|
||||||
|
bytes: 439742,
|
||||||
|
created_at: 1722962139,
|
||||||
|
status: 'processed',
|
||||||
|
status_details: null,
|
||||||
|
type: 'text/plain',
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
filepath:
|
||||||
|
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/hp1.txt',
|
||||||
|
usage: 1,
|
||||||
|
user: 'XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
context: 'assistants',
|
||||||
|
source: 'openai',
|
||||||
|
model: 'gpt-4o',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockResolvedValue({ filename: 'hp1.txt' });
|
||||||
|
|
||||||
|
const result = await processMessages({
|
||||||
|
openai: {},
|
||||||
|
client: { processedFileIds: new Set() },
|
||||||
|
messages,
|
||||||
|
});
|
||||||
|
|
||||||
|
const expectedText = `The text you have uploaded is from the book "Harry Potter and the Philosopher's Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:
|
||||||
|
|
||||||
|
1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry^1^.
|
||||||
|
|
||||||
|
2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander's^1^.
|
||||||
|
|
||||||
|
3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background^1^.
|
||||||
|
|
||||||
|
4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy^1^.
|
||||||
|
|
||||||
|
5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher's Stone and its connection to the dark wizard Voldemort^1^.
|
||||||
|
|
||||||
|
These points highlight Harry's initial experiences in the magical world and set the stage for his adventures at Hogwarts.
|
||||||
|
|
||||||
|
^1.^ hp1.txt`;
|
||||||
|
|
||||||
|
expect(result.text).toBe(expectedText);
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles edge case with pre-existing citation-like text', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value:
|
||||||
|
'This is a test ^1^ with pre-existing citation-like text. Here\'s a real citation【11:2†source】.',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '【11:2†source】',
|
||||||
|
start_index: 79,
|
||||||
|
end_index: 92,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
|
||||||
|
|
||||||
|
const result = await processMessages({
|
||||||
|
openai: {},
|
||||||
|
client: { processedFileIds: new Set() },
|
||||||
|
messages,
|
||||||
|
});
|
||||||
|
|
||||||
|
const expectedText =
|
||||||
|
'This is a test ^1^ with pre-existing citation-like text. Here\'s a real citation^1^.\n\n^1.^ test.txt';
|
||||||
|
|
||||||
|
expect(result.text).toBe(expectedText);
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles FILE_PATH annotation type', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value: 'Here is a file path: [file_path]',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_path',
|
||||||
|
text: '[file_path]',
|
||||||
|
start_index: 21,
|
||||||
|
end_index: 32,
|
||||||
|
file_path: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockResolvedValue({
|
||||||
|
filename: 'test.txt',
|
||||||
|
filepath: '/path/to/test.txt',
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await processMessages({
|
||||||
|
openai: {},
|
||||||
|
client: { processedFileIds: new Set() },
|
||||||
|
messages,
|
||||||
|
});
|
||||||
|
|
||||||
|
const expectedText = 'Here is a file path: /path/to/test.txt';
|
||||||
|
|
||||||
|
expect(result.text).toBe(expectedText);
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles FILE_CITATION annotation type', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value: 'Here is a citation: [citation]',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '[citation]',
|
||||||
|
start_index: 20,
|
||||||
|
end_index: 30,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
|
||||||
|
|
||||||
|
const result = await processMessages({
|
||||||
|
openai: {},
|
||||||
|
client: { processedFileIds: new Set() },
|
||||||
|
messages,
|
||||||
|
});
|
||||||
|
|
||||||
|
const expectedText = 'Here is a citation: ^1^\n\n^1.^ test.txt';
|
||||||
|
|
||||||
|
expect(result.text).toBe(expectedText);
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles multiple annotation types in a single message', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value:
|
||||||
|
'File path: [file_path]. Citation: [citation1]. Another citation: [citation2].',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_path',
|
||||||
|
text: '[file_path]',
|
||||||
|
start_index: 11,
|
||||||
|
end_index: 22,
|
||||||
|
file_path: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXX1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '[citation1]',
|
||||||
|
start_index: 34,
|
||||||
|
end_index: 45,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXX2',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '[citation2]',
|
||||||
|
start_index: 65,
|
||||||
|
end_index: 76,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXX3',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockResolvedValueOnce({
|
||||||
|
filename: 'file1.txt',
|
||||||
|
filepath: '/path/to/file1.txt',
|
||||||
|
});
|
||||||
|
retrieveAndProcessFile.mockResolvedValueOnce({ filename: 'file2.txt' });
|
||||||
|
retrieveAndProcessFile.mockResolvedValueOnce({ filename: 'file3.txt' });
|
||||||
|
|
||||||
|
const result = await processMessages({
|
||||||
|
openai: {},
|
||||||
|
client: { processedFileIds: new Set() },
|
||||||
|
messages,
|
||||||
|
});
|
||||||
|
|
||||||
|
const expectedText =
|
||||||
|
'File path: /path/to/file1.txt. Citation: ^1^. Another citation: ^2^.\n\n^1.^ file2.txt\n^2.^ file3.txt';
|
||||||
|
|
||||||
|
expect(result.text).toBe(expectedText);
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles annotation processing failure', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value: 'This citation will fail: [citation]',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_citation',
|
||||||
|
text: '[citation]',
|
||||||
|
start_index: 25,
|
||||||
|
end_index: 35,
|
||||||
|
file_citation: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
created_at: 1,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
retrieveAndProcessFile.mockRejectedValue(new Error('File not found'));
|
||||||
|
|
||||||
|
const result = await processMessages({
|
||||||
|
openai: {},
|
||||||
|
client: { processedFileIds: new Set() },
|
||||||
|
messages,
|
||||||
|
});
|
||||||
|
|
||||||
|
const expectedText = 'This citation will fail: [citation]';
|
||||||
|
|
||||||
|
expect(result.text).toBe(expectedText);
|
||||||
|
expect(result.edited).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles multiple FILE_PATH annotations with sandbox links', async () => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
id: 'msg_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
object: 'thread.message',
|
||||||
|
created_at: 1722983745,
|
||||||
|
assistant_id: 'asst_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
thread_id: 'thread_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
run_id: 'run_XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
status: 'completed',
|
||||||
|
incomplete_details: null,
|
||||||
|
incomplete_at: null,
|
||||||
|
completed_at: 1722983747,
|
||||||
|
role: 'assistant',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: {
|
||||||
|
value:
|
||||||
|
'I have generated three dummy CSV files for you. You can download them using the links below:\n\n1. [Download Dummy Data 1](sandbox:/mnt/data/dummy_data1.csv)\n2. [Download Dummy Data 2](sandbox:/mnt/data/dummy_data2.csv)\n3. [Download Dummy Data 3](sandbox:/mnt/data/dummy_data3.csv)',
|
||||||
|
annotations: [
|
||||||
|
{
|
||||||
|
type: 'file_path',
|
||||||
|
text: 'sandbox:/mnt/data/dummy_data1.csv',
|
||||||
|
start_index: 121,
|
||||||
|
end_index: 154,
|
||||||
|
file_path: {
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_path',
|
||||||
|
text: 'sandbox:/mnt/data/dummy_data2.csv',
|
||||||
|
start_index: 183,
|
||||||
|
end_index: 216,
|
||||||
|
file_path: {
|
||||||
|
file_id: 'file-YYYYYYYYYYYYYYYYYYYY',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'file_path',
|
||||||
|
text: 'sandbox:/mnt/data/dummy_data3.csv',
|
||||||
|
start_index: 245,
|
||||||
|
end_index: 278,
|
||||||
|
file_path: {
|
||||||
|
file_id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
attachments: [
|
||||||
|
{
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
tools: [
|
||||||
|
{
|
||||||
|
type: 'code_interpreter',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
file_id: 'file-YYYYYYYYYYYYYYYYYYYY',
|
||||||
|
tools: [
|
||||||
|
{
|
||||||
|
type: 'code_interpreter',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
file_id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
|
||||||
|
tools: [
|
||||||
|
{
|
||||||
|
type: 'code_interpreter',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
metadata: {},
|
||||||
|
files: [
|
||||||
|
{
|
||||||
|
object: 'file',
|
||||||
|
id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
purpose: 'assistants_output',
|
||||||
|
filename: 'dummy_data1.csv',
|
||||||
|
bytes: 1925,
|
||||||
|
created_at: 1722983746,
|
||||||
|
status: 'processed',
|
||||||
|
status_details: null,
|
||||||
|
type: 'text/csv',
|
||||||
|
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
filepath:
|
||||||
|
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/dummy_data1.csv',
|
||||||
|
usage: 1,
|
||||||
|
user: 'XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
context: 'assistants_output',
|
||||||
|
source: 'openai',
|
||||||
|
model: 'gpt-4o-mini',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
object: 'file',
|
||||||
|
id: 'file-YYYYYYYYYYYYYYYYYYYY',
|
||||||
|
purpose: 'assistants_output',
|
||||||
|
filename: 'dummy_data2.csv',
|
||||||
|
bytes: 4221,
|
||||||
|
created_at: 1722983746,
|
||||||
|
status: 'processed',
|
||||||
|
status_details: null,
|
||||||
|
type: 'text/csv',
|
||||||
|
file_id: 'file-YYYYYYYYYYYYYYYYYYYY',
|
||||||
|
filepath:
|
||||||
|
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-YYYYYYYYYYYYYYYYYYYY/dummy_data2.csv',
|
||||||
|
usage: 1,
|
||||||
|
user: 'XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
context: 'assistants_output',
|
||||||
|
source: 'openai',
|
||||||
|
model: 'gpt-4o-mini',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
object: 'file',
|
||||||
|
id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
|
||||||
|
purpose: 'assistants_output',
|
||||||
|
filename: 'dummy_data3.csv',
|
||||||
|
bytes: 3534,
|
||||||
|
created_at: 1722983747,
|
||||||
|
status: 'processed',
|
||||||
|
status_details: null,
|
||||||
|
type: 'text/csv',
|
||||||
|
file_id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
|
||||||
|
filepath:
|
||||||
|
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-ZZZZZZZZZZZZZZZZZZZZ/dummy_data3.csv',
|
||||||
|
usage: 1,
|
||||||
|
user: 'XXXXXXXXXXXXXXXXXXXX',
|
||||||
|
context: 'assistants_output',
|
||||||
|
source: 'openai',
|
||||||
|
model: 'gpt-4o-mini',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const mockClient = {
|
||||||
|
processedFileIds: new Set(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mock the retrieveAndProcessFile function for each file
|
||||||
|
retrieveAndProcessFile.mockImplementation(({ file_id }) => {
|
||||||
|
const fileMap = {
|
||||||
|
'file-XXXXXXXXXXXXXXXXXXXX': {
|
||||||
|
filename: 'dummy_data1.csv',
|
||||||
|
filepath:
|
||||||
|
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/dummy_data1.csv',
|
||||||
|
},
|
||||||
|
'file-YYYYYYYYYYYYYYYYYYYY': {
|
||||||
|
filename: 'dummy_data2.csv',
|
||||||
|
filepath:
|
||||||
|
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-YYYYYYYYYYYYYYYYYYYY/dummy_data2.csv',
|
||||||
|
},
|
||||||
|
'file-ZZZZZZZZZZZZZZZZZZZZ': {
|
||||||
|
filename: 'dummy_data3.csv',
|
||||||
|
filepath:
|
||||||
|
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-ZZZZZZZZZZZZZZZZZZZZ/dummy_data3.csv',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
return Promise.resolve(fileMap[file_id]);
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await processMessages({ openai: {}, client: mockClient, messages });
|
||||||
|
|
||||||
|
const expectedText =
|
||||||
|
'I have generated three dummy CSV files for you. You can download them using the links below:\n\n1. [Download Dummy Data 1](https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/dummy_data1.csv)\n2. [Download Dummy Data 2](https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-YYYYYYYYYYYYYYYYYYYY/dummy_data2.csv)\n3. [Download Dummy Data 3](https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-ZZZZZZZZZZZZZZZZZZZZ/dummy_data3.csv)';
|
||||||
|
|
||||||
|
expect(result.text).toBe(expectedText);
|
||||||
|
expect(result.edited).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Add table
Add a link
Reference in a new issue