mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 17:00:15 +01:00
🖼️ fix: correct image extraction (#3538)
* fix: regex issue extracting text with images in markdown * fix: update addImages function to include only the first observed image path in the response message * ci: tests for addImages function: correct image extraction * fix(GoogleClient): linting --------- Co-authored-by: Dongwoo Jeong <dongwoo.jeong@lge.com> Co-authored-by: Dongwoo Jeong <dongwoo@duck.com>
This commit is contained in:
parent
389b2a6cab
commit
5baa95bd9a
3 changed files with 86 additions and 26 deletions
|
|
@ -626,11 +626,11 @@ class GoogleClient extends BaseClient {
|
||||||
const { onProgress, abortController } = options;
|
const { onProgress, abortController } = options;
|
||||||
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
|
const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
|
||||||
const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {};
|
const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {};
|
||||||
|
|
||||||
let examples;
|
let examples;
|
||||||
|
|
||||||
let clientOptions = { ...parameters, maxRetries: 2 };
|
let clientOptions = { ...parameters, maxRetries: 2 };
|
||||||
|
|
||||||
if (this.project_id) {
|
if (this.project_id) {
|
||||||
clientOptions['authOptions'] = {
|
clientOptions['authOptions'] = {
|
||||||
credentials: {
|
credentials: {
|
||||||
|
|
@ -639,16 +639,16 @@ class GoogleClient extends BaseClient {
|
||||||
projectId: this.project_id,
|
projectId: this.project_id,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!parameters) {
|
if (!parameters) {
|
||||||
clientOptions = { ...clientOptions, ...this.modelOptions };
|
clientOptions = { ...clientOptions, ...this.modelOptions };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.isGenerativeModel && !this.project_id) {
|
if (this.isGenerativeModel && !this.project_id) {
|
||||||
clientOptions.modelName = clientOptions.model;
|
clientOptions.modelName = clientOptions.model;
|
||||||
delete clientOptions.model;
|
delete clientOptions.model;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_examples && _examples.length) {
|
if (_examples && _examples.length) {
|
||||||
examples = _examples
|
examples = _examples
|
||||||
.map((ex) => {
|
.map((ex) => {
|
||||||
|
|
@ -662,26 +662,26 @@ class GoogleClient extends BaseClient {
|
||||||
};
|
};
|
||||||
})
|
})
|
||||||
.filter((ex) => ex);
|
.filter((ex) => ex);
|
||||||
|
|
||||||
clientOptions.examples = examples;
|
clientOptions.examples = examples;
|
||||||
}
|
}
|
||||||
|
|
||||||
const model = this.createLLM(clientOptions);
|
const model = this.createLLM(clientOptions);
|
||||||
|
|
||||||
let reply = '';
|
let reply = '';
|
||||||
const messages = this.isTextModel ? _payload.trim() : _messages;
|
const messages = this.isTextModel ? _payload.trim() : _messages;
|
||||||
|
|
||||||
if (!this.isVisionModel && context && messages?.length > 0) {
|
if (!this.isVisionModel && context && messages?.length > 0) {
|
||||||
messages.unshift(new SystemMessage(context));
|
messages.unshift(new SystemMessage(context));
|
||||||
}
|
}
|
||||||
|
|
||||||
const modelName = clientOptions.modelName ?? clientOptions.model ?? '';
|
const modelName = clientOptions.modelName ?? clientOptions.model ?? '';
|
||||||
if (modelName?.includes('1.5') && !this.project_id) {
|
if (modelName?.includes('1.5') && !this.project_id) {
|
||||||
const client = model;
|
const client = model;
|
||||||
const requestOptions = {
|
const requestOptions = {
|
||||||
contents: _payload,
|
contents: _payload,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (this.options?.promptPrefix?.length) {
|
if (this.options?.promptPrefix?.length) {
|
||||||
requestOptions.systemInstruction = {
|
requestOptions.systemInstruction = {
|
||||||
parts: [
|
parts: [
|
||||||
|
|
@ -691,9 +691,9 @@ class GoogleClient extends BaseClient {
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
requestOptions.safetySettings = _payload.safetySettings;
|
requestOptions.safetySettings = _payload.safetySettings;
|
||||||
|
|
||||||
const delay = modelName.includes('flash') ? 8 : 14;
|
const delay = modelName.includes('flash') ? 8 : 14;
|
||||||
const result = await client.generateContentStream(requestOptions);
|
const result = await client.generateContentStream(requestOptions);
|
||||||
for await (const chunk of result.stream) {
|
for await (const chunk of result.stream) {
|
||||||
|
|
@ -706,15 +706,15 @@ class GoogleClient extends BaseClient {
|
||||||
}
|
}
|
||||||
return reply;
|
return reply;
|
||||||
}
|
}
|
||||||
|
|
||||||
const stream = await model.stream(messages, {
|
const stream = await model.stream(messages, {
|
||||||
signal: abortController.signal,
|
signal: abortController.signal,
|
||||||
timeout: 7000,
|
timeout: 7000,
|
||||||
safetySettings: _payload.safetySettings,
|
safetySettings: _payload.safetySettings,
|
||||||
});
|
});
|
||||||
|
|
||||||
let delay = this.options.streamRate || 8;
|
let delay = this.options.streamRate || 8;
|
||||||
|
|
||||||
if (!this.options.streamRate) {
|
if (!this.options.streamRate) {
|
||||||
if (this.isGenerativeModel) {
|
if (this.isGenerativeModel) {
|
||||||
delay = 12;
|
delay = 12;
|
||||||
|
|
@ -723,7 +723,7 @@ class GoogleClient extends BaseClient {
|
||||||
delay = 5;
|
delay = 5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
const chunkText = chunk?.content ?? chunk;
|
const chunkText = chunk?.content ?? chunk;
|
||||||
await this.generateTextStream(chunkText, onProgress, {
|
await this.generateTextStream(chunkText, onProgress, {
|
||||||
|
|
@ -731,7 +731,7 @@ class GoogleClient extends BaseClient {
|
||||||
});
|
});
|
||||||
reply += chunkText;
|
reply += chunkText;
|
||||||
}
|
}
|
||||||
|
|
||||||
return reply;
|
return reply;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -869,17 +869,18 @@ class GoogleClient extends BaseClient {
|
||||||
|
|
||||||
async sendCompletion(payload, opts = {}) {
|
async sendCompletion(payload, opts = {}) {
|
||||||
payload.safetySettings = this.getSafetySettings();
|
payload.safetySettings = this.getSafetySettings();
|
||||||
|
|
||||||
let reply = '';
|
let reply = '';
|
||||||
reply = await this.getCompletion(payload, opts);
|
reply = await this.getCompletion(payload, opts);
|
||||||
return reply.trim();
|
return reply.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
getSafetySettings() {
|
getSafetySettings() {
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
|
category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
|
||||||
threshold: process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
threshold:
|
||||||
|
process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
category: 'HARM_CATEGORY_HATE_SPEECH',
|
category: 'HARM_CATEGORY_HATE_SPEECH',
|
||||||
|
|
@ -891,7 +892,8 @@ class GoogleClient extends BaseClient {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
|
category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
|
||||||
threshold: process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
threshold:
|
||||||
|
process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -60,10 +60,10 @@ function addImages(intermediateSteps, responseMessage) {
|
||||||
if (!observation || !observation.includes('![')) {
|
if (!observation || !observation.includes('![')) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const observedImagePath = observation.match(/!\[.*\]\([^)]*\)/g);
|
const observedImagePath = observation.match(/!\[[^(]*\]\([^)]*\)/g);
|
||||||
if (observedImagePath && !responseMessage.text.includes(observedImagePath[0])) {
|
if (observedImagePath && !responseMessage.text.includes(observedImagePath[0])) {
|
||||||
responseMessage.text += '\n' + observation;
|
responseMessage.text += '\n' + observedImagePath[0];
|
||||||
logger.debug('[addImages] added image from intermediateSteps:', observation);
|
logger.debug('[addImages] added image from intermediateSteps:', observedImagePath[0]);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -81,4 +81,62 @@ describe('addImages', () => {
|
||||||
addImages(intermediateSteps, responseMessage);
|
addImages(intermediateSteps, responseMessage);
|
||||||
expect(responseMessage.text).toBe(`${originalText}\n${imageMarkdown}`);
|
expect(responseMessage.text).toBe(`${originalText}\n${imageMarkdown}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should extract only image markdowns when there is text between them', () => {
|
||||||
|
const markdownWithTextBetweenImages = `
|
||||||
|

|
||||||
|
Some text between images that should not be included.
|
||||||
|

|
||||||
|
More text that should be ignored.
|
||||||
|

|
||||||
|
`;
|
||||||
|
intermediateSteps.push({ observation: markdownWithTextBetweenImages });
|
||||||
|
addImages(intermediateSteps, responseMessage);
|
||||||
|
expect(responseMessage.text).toBe('\n');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should only return the first image when multiple images are present', () => {
|
||||||
|
const markdownWithMultipleImages = `
|
||||||
|

|
||||||
|

|
||||||
|

|
||||||
|
`;
|
||||||
|
intermediateSteps.push({ observation: markdownWithMultipleImages });
|
||||||
|
addImages(intermediateSteps, responseMessage);
|
||||||
|
expect(responseMessage.text).toBe('\n');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should not include any text or metadata surrounding the image markdown', () => {
|
||||||
|
const markdownWithMetadata = `
|
||||||
|
Title: Test Document
|
||||||
|
Author: John Doe
|
||||||
|

|
||||||
|
Some content after the image.
|
||||||
|
Vector values: [0.1, 0.2, 0.3]
|
||||||
|
`;
|
||||||
|
intermediateSteps.push({ observation: markdownWithMetadata });
|
||||||
|
addImages(intermediateSteps, responseMessage);
|
||||||
|
expect(responseMessage.text).toBe('\n');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle complex markdown with multiple images and only return the first one', () => {
|
||||||
|
const complexMarkdown = `
|
||||||
|
# Document Title
|
||||||
|
|
||||||
|
## Section 1
|
||||||
|
Here's some text with an embedded image:
|
||||||
|

|
||||||
|
|
||||||
|
## Section 2
|
||||||
|
More text here...
|
||||||
|

|
||||||
|
|
||||||
|
### Subsection
|
||||||
|
Even more content
|
||||||
|

|
||||||
|
`;
|
||||||
|
intermediateSteps.push({ observation: complexMarkdown });
|
||||||
|
addImages(intermediateSteps, responseMessage);
|
||||||
|
expect(responseMessage.text).toBe('\n');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue