diff --git a/api/app/clients/GoogleClient.js b/api/app/clients/GoogleClient.js index 225d73d935..2dd921c3ce 100644 --- a/api/app/clients/GoogleClient.js +++ b/api/app/clients/GoogleClient.js @@ -626,11 +626,11 @@ class GoogleClient extends BaseClient { const { onProgress, abortController } = options; const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE; const { messages: _messages, context, examples: _examples } = instances?.[0] ?? {}; - + let examples; - + let clientOptions = { ...parameters, maxRetries: 2 }; - + if (this.project_id) { clientOptions['authOptions'] = { credentials: { @@ -639,16 +639,16 @@ class GoogleClient extends BaseClient { projectId: this.project_id, }; } - + if (!parameters) { clientOptions = { ...clientOptions, ...this.modelOptions }; } - + if (this.isGenerativeModel && !this.project_id) { clientOptions.modelName = clientOptions.model; delete clientOptions.model; } - + if (_examples && _examples.length) { examples = _examples .map((ex) => { @@ -662,26 +662,26 @@ class GoogleClient extends BaseClient { }; }) .filter((ex) => ex); - + clientOptions.examples = examples; } - + const model = this.createLLM(clientOptions); - + let reply = ''; const messages = this.isTextModel ? _payload.trim() : _messages; - + if (!this.isVisionModel && context && messages?.length > 0) { messages.unshift(new SystemMessage(context)); } - + const modelName = clientOptions.modelName ?? clientOptions.model ?? ''; if (modelName?.includes('1.5') && !this.project_id) { const client = model; const requestOptions = { contents: _payload, }; - + if (this.options?.promptPrefix?.length) { requestOptions.systemInstruction = { parts: [ @@ -691,9 +691,9 @@ class GoogleClient extends BaseClient { ], }; } - + requestOptions.safetySettings = _payload.safetySettings; - + const delay = modelName.includes('flash') ? 8 : 14; const result = await client.generateContentStream(requestOptions); for await (const chunk of result.stream) { @@ -706,15 +706,15 @@ class GoogleClient extends BaseClient { } return reply; } - + const stream = await model.stream(messages, { signal: abortController.signal, timeout: 7000, safetySettings: _payload.safetySettings, }); - + let delay = this.options.streamRate || 8; - + if (!this.options.streamRate) { if (this.isGenerativeModel) { delay = 12; @@ -723,7 +723,7 @@ class GoogleClient extends BaseClient { delay = 5; } } - + for await (const chunk of stream) { const chunkText = chunk?.content ?? chunk; await this.generateTextStream(chunkText, onProgress, { @@ -731,7 +731,7 @@ class GoogleClient extends BaseClient { }); reply += chunkText; } - + return reply; } @@ -869,17 +869,18 @@ class GoogleClient extends BaseClient { async sendCompletion(payload, opts = {}) { payload.safetySettings = this.getSafetySettings(); - + let reply = ''; reply = await this.getCompletion(payload, opts); return reply.trim(); } - + getSafetySettings() { return [ { category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT', - threshold: process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED', + threshold: + process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED', }, { category: 'HARM_CATEGORY_HATE_SPEECH', @@ -891,7 +892,8 @@ class GoogleClient extends BaseClient { }, { category: 'HARM_CATEGORY_DANGEROUS_CONTENT', - threshold: process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED', + threshold: + process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED', }, ]; } diff --git a/api/app/clients/output_parsers/addImages.js b/api/app/clients/output_parsers/addImages.js index ec04bcac86..7bef60259c 100644 --- a/api/app/clients/output_parsers/addImages.js +++ b/api/app/clients/output_parsers/addImages.js @@ -60,10 +60,10 @@ function addImages(intermediateSteps, responseMessage) { if (!observation || !observation.includes('![')) { return; } - const observedImagePath = observation.match(/!\[.*\]\([^)]*\)/g); + const observedImagePath = observation.match(/!\[[^(]*\]\([^)]*\)/g); if (observedImagePath && !responseMessage.text.includes(observedImagePath[0])) { - responseMessage.text += '\n' + observation; - logger.debug('[addImages] added image from intermediateSteps:', observation); + responseMessage.text += '\n' + observedImagePath[0]; + logger.debug('[addImages] added image from intermediateSteps:', observedImagePath[0]); } }); } diff --git a/api/app/clients/output_parsers/addImages.spec.js b/api/app/clients/output_parsers/addImages.spec.js index eb4d87d65a..7c5a04137e 100644 --- a/api/app/clients/output_parsers/addImages.spec.js +++ b/api/app/clients/output_parsers/addImages.spec.js @@ -81,4 +81,62 @@ describe('addImages', () => { addImages(intermediateSteps, responseMessage); expect(responseMessage.text).toBe(`${originalText}\n${imageMarkdown}`); }); + + it('should extract only image markdowns when there is text between them', () => { + const markdownWithTextBetweenImages = ` + ![image1](/images/image1.png) + Some text between images that should not be included. + ![image2](/images/image2.png) + More text that should be ignored. + ![image3](/images/image3.png) + `; + intermediateSteps.push({ observation: markdownWithTextBetweenImages }); + addImages(intermediateSteps, responseMessage); + expect(responseMessage.text).toBe('\n![image1](/images/image1.png)'); + }); + + it('should only return the first image when multiple images are present', () => { + const markdownWithMultipleImages = ` + ![image1](/images/image1.png) + ![image2](/images/image2.png) + ![image3](/images/image3.png) + `; + intermediateSteps.push({ observation: markdownWithMultipleImages }); + addImages(intermediateSteps, responseMessage); + expect(responseMessage.text).toBe('\n![image1](/images/image1.png)'); + }); + + it('should not include any text or metadata surrounding the image markdown', () => { + const markdownWithMetadata = ` + Title: Test Document + Author: John Doe + ![image1](/images/image1.png) + Some content after the image. + Vector values: [0.1, 0.2, 0.3] + `; + intermediateSteps.push({ observation: markdownWithMetadata }); + addImages(intermediateSteps, responseMessage); + expect(responseMessage.text).toBe('\n![image1](/images/image1.png)'); + }); + + it('should handle complex markdown with multiple images and only return the first one', () => { + const complexMarkdown = ` + # Document Title + + ## Section 1 + Here's some text with an embedded image: + ![image1](/images/image1.png) + + ## Section 2 + More text here... + ![image2](/images/image2.png) + + ### Subsection + Even more content + ![image3](/images/image3.png) + `; + intermediateSteps.push({ observation: complexMarkdown }); + addImages(intermediateSteps, responseMessage); + expect(responseMessage.text).toBe('\n![image1](/images/image1.png)'); + }); });