mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
🖼️ fix: correct image extraction (#3538)
* fix: regex issue extracting text with images in markdown * fix: update addImages function to include only the first observed image path in the response message * ci: tests for addImages function: correct image extraction * fix(GoogleClient): linting --------- Co-authored-by: Dongwoo Jeong <dongwoo.jeong@lge.com> Co-authored-by: Dongwoo Jeong <dongwoo@duck.com>
This commit is contained in:
parent
389b2a6cab
commit
5baa95bd9a
3 changed files with 86 additions and 26 deletions
|
|
@ -879,7 +879,8 @@ class GoogleClient extends BaseClient {
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
|
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
|
||||||
threshold: process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
threshold:
|
||||||
|
process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
category: 'HARM_CATEGORY_HATE_SPEECH',
|
category: 'HARM_CATEGORY_HATE_SPEECH',
|
||||||
|
|
@ -891,7 +892,8 @@ class GoogleClient extends BaseClient {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
|
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
|
||||||
threshold: process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
threshold:
|
||||||
|
process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -60,10 +60,10 @@ function addImages(intermediateSteps, responseMessage) {
|
||||||
if (!observation || !observation.includes('![')) {
|
if (!observation || !observation.includes('![')) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const observedImagePath = observation.match(/!\[.*\]\([^)]*\)/g);
|
const observedImagePath = observation.match(/!\[[^(]*\]\([^)]*\)/g);
|
||||||
if (observedImagePath && !responseMessage.text.includes(observedImagePath[0])) {
|
if (observedImagePath && !responseMessage.text.includes(observedImagePath[0])) {
|
||||||
responseMessage.text += '\n' + observation;
|
responseMessage.text += '\n' + observedImagePath[0];
|
||||||
logger.debug('[addImages] added image from intermediateSteps:', observation);
|
logger.debug('[addImages] added image from intermediateSteps:', observedImagePath[0]);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -81,4 +81,62 @@ describe('addImages', () => {
|
||||||
addImages(intermediateSteps, responseMessage);
|
addImages(intermediateSteps, responseMessage);
|
||||||
expect(responseMessage.text).toBe(`${originalText}\n${imageMarkdown}`);
|
expect(responseMessage.text).toBe(`${originalText}\n${imageMarkdown}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should extract only image markdowns when there is text between them', () => {
|
||||||
|
const markdownWithTextBetweenImages = `
|
||||||
|

|
||||||
|
Some text between images that should not be included.
|
||||||
|

|
||||||
|
More text that should be ignored.
|
||||||
|

|
||||||
|
`;
|
||||||
|
intermediateSteps.push({ observation: markdownWithTextBetweenImages });
|
||||||
|
addImages(intermediateSteps, responseMessage);
|
||||||
|
expect(responseMessage.text).toBe('\n');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should only return the first image when multiple images are present', () => {
|
||||||
|
const markdownWithMultipleImages = `
|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|
`;
|
||||||
|
intermediateSteps.push({ observation: markdownWithMultipleImages });
|
||||||
|
addImages(intermediateSteps, responseMessage);
|
||||||
|
expect(responseMessage.text).toBe('\n');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should not include any text or metadata surrounding the image markdown', () => {
|
||||||
|
const markdownWithMetadata = `
|
||||||
|
Title: Test Document
|
||||||
|
Author: John Doe
|
||||||
|

|
||||||
|
Some content after the image.
|
||||||
|
Vector values: [0.1, 0.2, 0.3]
|
||||||
|
`;
|
||||||
|
intermediateSteps.push({ observation: markdownWithMetadata });
|
||||||
|
addImages(intermediateSteps, responseMessage);
|
||||||
|
expect(responseMessage.text).toBe('\n');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle complex markdown with multiple images and only return the first one', () => {
|
||||||
|
const complexMarkdown = `
|
||||||
|
# Document Title
|
||||||
|
|
||||||
|
## Section 1
|
||||||
|
Here's some text with an embedded image:
|
||||||
|

|
||||||
|
|
||||||
|
## Section 2
|
||||||
|
More text here...
|
||||||
|

|
||||||
|
|
||||||
|
### Subsection
|
||||||
|
Even more content
|
||||||
|

|
||||||
|
`;
|
||||||
|
intermediateSteps.push({ observation: complexMarkdown });
|
||||||
|
addImages(intermediateSteps, responseMessage);
|
||||||
|
expect(responseMessage.text).toBe('\n');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue