LibreChat/api/server/utils/handleText.js
Danny Avila 0dbbf7de04
🔎 feat: Native Web Search with Citation References (#7516)
* WIP: search tool integration

* WIP: Add web search capabilities and API key management to agent actions

* WIP: web search capability to agent configuration and selection

* WIP: Add web search capability to backend agent configuration

* WIP: add web search option to default agent form values

* WIP: add attachments for web search

* feat: add plugin for processing web search citations

* WIP: first pass, Citation UI

* chore: remove console.log

* feat: Add AnimatedTabs component for tabbed UI functionality

* refactor: AnimatedTabs component with CSS animations and stable ID generation

* WIP example content

* feat: SearchContext for managing search results apart from MessageContext

* feat: Enhance AnimatedTabs with underline animation and state management

* WIP: first pass, Implement dynamic tab functionality in Sources component with search results integration

* fix: Update class names for improved styling in Sources and AnimatedTabs components

* feat: Improve styling and layout in Sources component with enhanced button and item designs

* feat: Refactor Sources component to integrate OGDialog for source display and improve layout

* style: Update background color in SourceItem and SourcesGroup components for improved visibility

* refactor: Sources component to enhance SourceItem structure and improve favicon handling

* style: Adjust font size of domain text in SourceItem for better readability

* feat: Add localization for citation source and details in CompositeCitation component

* style: add theming to Citation components

* feat: Enhance SourceItem component with dialog support and improved hovercard functionality

* feat: Add localization for sources tab and image alt text in Sources component

* style: Replace divs with spans for better semantic structure in CompositeCitation and Citation components

* refactor: Sources component to use useMemo for tab generation and improve performance

* chore: bump @librechat/agents to v2.4.318

* chore: update search result types

* fix: search results retrieval in ContentParts component, re-render attachments when expected

* feat: update sources style/types to use latest search result structure

* style: enhance Dialog (expanded) SourceItem component with link wrapping and improved styling

* style: update ImageItem component styling for improved title visibility

* refactor: remove SourceItemBase component and adjust SourceItem layout for improved styling

* chore: linting twcss order

* fix: prevent FileAttachment from rendering search attachments

* fix: append underscore to responseMessageId for unique identification to prevent mapping of previous latest message's attachments

* chore: remove unused parameter 'useSpecs' from loadTools function

* chore: twcss order

* WIP: WebSearch Tool UI

* refactor: add limit parameter to StackedFavicons for customizable source display

* refactor: optimize search results memoization by making more granular and separate conerns

* refactor: integrated StackedFavicons to WebSearch mid-run

* chore: bump @librechat/agents to expose handleToolCallChunks

* chore: use typedefs from dedicated file instead of defining them in AgentClient module

* WIP: first pass, search progress results

* refactor: move createOnSearchResults function to a dedicated search module

* chore: bump @librechat/agents to v2.4.320

* WIP: first pass, search results processed UX

* refactor: consolidate context variables in createOnSearchResults function

* chore: bump @librechat/agents to v2.4.321

* feat: add guidelines for web search tool response formatting in loadTools function

* feat: add isLast prop to Part component and update WebSearch logic for improved state handling

* style: update Hovercard styles for improved UI consistency

* feat: export FaviconImage component for improved accessibility in other modules

* refactor: export getCleanDomain function and use FaviconImage in Citation component for improved source representation

* refactor: implement SourceHovercard component for consistency and DRY compliance

* fix: replace <p> with <span> for snippet and title in SourceItem and SourceHovercard for consistency

* style: `not-prose`

* style: remove 'not-prose' class for consistency in SourceItem, Citation, and SourceHovercard components, adjust style classes

* refactor: `imageUrl` on hover and prevent duplicate sources

* refactor: enhance SourcesGroup dialog layout and improve source item presentation

* refactor: reorganize Web Components, save in same directory

* feat: add 'news' refType to refTypeMap for citation sources

* style: adjust Hovercard width for improved layout

* refactor: update tool usage guidelines for improved clarity and execution

* chore: linting

* feat: add Web Search badge with initial permissions and local storage logic

* feat: add webSearch support to interface and permissions schemas

* feat: implement Web Search API key management and localization updates

* feat: refactor Web Search API key handling and integrate new search API key form

* fix: remove unnecessary visibility state from FileAttachment component

* feat: update WebSearch component to use Globe icon and localized search label

* feat: enhance ApiKeyDialog with dropdown for reranker selection and update translations

* feat: implement dropdown menus for engine, scraper, and reranker selection in ApiKeyDialog

* chore: linting and add unknown instead of `any` type

* feat: refactor ApiKeyDialog and useAuthSearchTool for improved API key management

* refactor: update ocrSchema to use template literals for default apiKey and baseURL

* feat: add web search configuration and utility functions for environment variable extraction

* fix: ensure filepath is defined before checking its prefix in useAttachmentHandler

* feat: enhance web search functionality with improved configuration and environment variable extraction for authFields

* fix: update auth type in TPluginAction and TUpdateUserPlugins to use Partial<Record<string, string>>

* feat: implement web search authentication verification and enhance webSearchAuth structure

* feat: enhance ephemeral agent handling with new web search capability and type definition

* feat: enhance isEphemeralAgent function to include web search selection

* feat: refactor verifyWebSearchAuth to improve key handling and authentication checks

* feat: implement loadWebSearchAuth function for improved web search authentication handling

* feat: enhance web search authentication with new configuration options and refactor related types

* refactor: rename search engine to search provider and update related localization keys

* feat: update verifyWebSearchAuth to handle multiple authentication types and improve error handling

* feat: update ApiKeyDialog to accept authTypes prop and remove isUserProvided check

* feat: add tests for extractWebSearchEnvVars and loadWebSearchAuth functions

* feat: enhance loadWebSearchAuth to support specific service checks for providers, scrapers, and rerankers

* fix: update web search configuration key and adjust auth result handling in loadTools function

* feat: add new progress key for repeated web searching and update localization

* chore: bump @librechat/agents to 2.4.322

* feat: enhance loadTools function to include ISO time and improve search tool logging

* feat: update StackedFavicons to handle negative start index and improve citation attribution styling and text

* chore: update .gitignore to categorize AI-related files

* fix: mobile responsiveness of sources/citations hovercards

* feat: enhance source display with improved line clamping for better readability

* chore: bump @librechat/agents to v2.4.33

* feat: add handling for image sources in references mapping

* chore: bump librechat-data-provider version to 0.7.84

* chore: bump @librechat/agents version to 2.4.34

* fix: update auth handling to support multiple auth types in tools and allow key configuration in agent panel

* chore: remove redundant agent attribution text from search form

* fix: web search auth uninstall

* refactor: convert CheckboxButton to a forwardRef component and update setValue callback signature

* feat: add triggerRef prop to ApiKeyDialog components for improved dialog control

* feat: integrate triggerRef in CodeInterpreter and WebSearch components for enhanced dialog management

* feat: enhance ApiKeyDialog with additional links for Firecrawl and Jina API key guidance

* feat: implement web search configuration handling in ApiKeyDialog and add tests for dropdown visibility

* fix: update webSearchConfig reference in config route for correct payload assignment

* feat: update ApiKeyDialog to conditionally render sections based on authTypes and modify loadWebSearchAuth to correctly categorize authentication types

* feat: refactor ApiKeyDialog and related tests to use SearchCategories and RerankerTypes enums and remove nested ternaries

* refactor: move ThinkingButton rendering to improve layout consistency in ContentParts

* feat: integrate search context into Markdown component to conditionally include unicodeCitation plugin

* chore: bump @librechat/agents to v2.4.35

* chore: remove unused 18n key

* ci: add WEB_SEARCH permission testing and update AppService tests for new webSearch configuration

* ci: add more comprehensive tests for loadWebSearchAuth to validate authentication handling and authTypes structure

* chore: remove debugging console log from web.spec.ts to clean up test output
2025-05-23 17:40:40 -04:00

274 lines
7.5 KiB
JavaScript

const path = require('path');
const crypto = require('crypto');
const {
Capabilities,
EModelEndpoint,
isAgentsEndpoint,
AgentCapabilities,
isAssistantsEndpoint,
defaultRetrievalModels,
defaultAssistantsVersion,
} = require('librechat-data-provider');
const { Providers } = require('@librechat/agents');
const partialRight = require('lodash/partialRight');
const { sendMessage } = require('./streamResponse');
/** Helper function to escape special characters in regex
* @param {string} string - The string to escape.
* @returns {string} The escaped string.
*/
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
const addSpaceIfNeeded = (text) => (text.length > 0 && !text.endsWith(' ') ? text + ' ' : text);
const base = { message: true, initial: true };
const createOnProgress = (
{ generation = '', onProgress: _onProgress } = {
generation: '',
onProgress: null,
},
) => {
let i = 0;
let tokens = addSpaceIfNeeded(generation);
const basePayload = Object.assign({}, base, { text: tokens || '' });
const progressCallback = (chunk, { res, ...rest }) => {
basePayload.text = basePayload.text + chunk;
const payload = Object.assign({}, basePayload, rest);
sendMessage(res, payload);
if (_onProgress) {
_onProgress(payload);
}
if (i === 0) {
basePayload.initial = false;
}
i++;
};
const sendIntermediateMessage = (res, payload, extraTokens = '') => {
basePayload.text = basePayload.text + extraTokens;
const message = Object.assign({}, basePayload, payload);
sendMessage(res, message);
if (i === 0) {
basePayload.initial = false;
}
i++;
};
const onProgress = (opts) => {
return partialRight(progressCallback, opts);
};
const getPartialText = () => {
return basePayload.text;
};
return { onProgress, getPartialText, sendIntermediateMessage };
};
const handleText = async (response) => {
let { text } = response;
response.text = text;
return text;
};
const isObject = (item) => item && typeof item === 'object' && !Array.isArray(item);
const getString = (input) => (isObject(input) ? JSON.stringify(input) : input);
function formatSteps(steps) {
let output = '';
for (let i = 0; i < steps.length; i++) {
const step = steps[i];
const actionInput = getString(step.action.toolInput);
const observation = step.observation;
if (actionInput === 'N/A' || observation?.trim()?.length === 0) {
continue;
}
output += `Input: ${actionInput}\nOutput: ${getString(observation)}`;
if (steps.length > 1 && i !== steps.length - 1) {
output += '\n---\n';
}
}
return output;
}
function formatAction(action) {
const formattedAction = {
plugin: action.tool,
input: getString(action.toolInput),
thought: action.log.includes('Thought: ')
? action.log.split('\n')[0].replace('Thought: ', '')
: action.log.split('\n')[0],
};
formattedAction.thought = getString(formattedAction.thought);
if (action.tool.toLowerCase() === 'self-reflection' || formattedAction.plugin === 'N/A') {
formattedAction.inputStr = `{\n\tthought: ${formattedAction.input}${
!formattedAction.thought.includes(formattedAction.input)
? ' - ' + formattedAction.thought
: ''
}\n}`;
formattedAction.inputStr = formattedAction.inputStr.replace('N/A - ', '');
} else {
const hasThought = formattedAction.thought.length > 0;
const thought = hasThought ? `\n\tthought: ${formattedAction.thought}` : '';
formattedAction.inputStr = `{\n\tplugin: ${formattedAction.plugin}\n\tinput: ${formattedAction.input}\n${thought}}`;
}
return formattedAction;
}
/**
* Checks if the given value is truthy by being either the boolean `true` or a string
* that case-insensitively matches 'true'.
*
* @function
* @param {string|boolean|null|undefined} value - The value to check.
* @returns {boolean} Returns `true` if the value is the boolean `true` or a case-insensitive
* match for the string 'true', otherwise returns `false`.
* @example
*
* isEnabled("True"); // returns true
* isEnabled("TRUE"); // returns true
* isEnabled(true); // returns true
* isEnabled("false"); // returns false
* isEnabled(false); // returns false
* isEnabled(null); // returns false
* isEnabled(); // returns false
*/
function isEnabled(value) {
if (typeof value === 'boolean') {
return value;
}
if (typeof value === 'string') {
return value.toLowerCase().trim() === 'true';
}
return false;
}
/**
* Checks if the provided value is 'user_provided'.
*
* @param {string} value - The value to check.
* @returns {boolean} - Returns true if the value is 'user_provided', otherwise false.
*/
const isUserProvided = (value) => value === 'user_provided';
/**
* Generate the configuration for a given key and base URL.
* @param {string} key
* @param {string} [baseURL]
* @param {string} [endpoint]
* @returns {boolean | { userProvide: boolean, userProvideURL?: boolean }}
*/
function generateConfig(key, baseURL, endpoint) {
if (!key) {
return false;
}
/** @type {{ userProvide: boolean, userProvideURL?: boolean }} */
const config = { userProvide: isUserProvided(key) };
if (baseURL) {
config.userProvideURL = isUserProvided(baseURL);
}
const assistants = isAssistantsEndpoint(endpoint);
const agents = isAgentsEndpoint(endpoint);
if (assistants) {
config.retrievalModels = defaultRetrievalModels;
config.capabilities = [
Capabilities.code_interpreter,
Capabilities.image_vision,
Capabilities.retrieval,
Capabilities.actions,
Capabilities.tools,
];
}
if (agents) {
config.capabilities = [
AgentCapabilities.execute_code,
AgentCapabilities.file_search,
AgentCapabilities.web_search,
AgentCapabilities.artifacts,
AgentCapabilities.actions,
AgentCapabilities.tools,
AgentCapabilities.chain,
AgentCapabilities.ocr,
];
}
if (assistants && endpoint === EModelEndpoint.azureAssistants) {
config.version = defaultAssistantsVersion.azureAssistants;
} else if (assistants) {
config.version = defaultAssistantsVersion.assistants;
}
return config;
}
/**
* Normalize the endpoint name to system-expected value.
* @param {string} name
* @returns {string}
*/
function normalizeEndpointName(name = '') {
return name.toLowerCase() === Providers.OLLAMA ? Providers.OLLAMA : name;
}
/**
* Sanitize a filename by removing any directory components, replacing non-alphanumeric characters
* @param {string} inputName
* @returns {string}
*/
function sanitizeFilename(inputName) {
// Remove any directory components
let name = path.basename(inputName);
// Replace any non-alphanumeric characters except for '.' and '-'
name = name.replace(/[^a-zA-Z0-9.-]/g, '_');
// Ensure the name doesn't start with a dot (hidden file in Unix-like systems)
if (name.startsWith('.') || name === '') {
name = '_' + name;
}
// Limit the length of the filename
const MAX_LENGTH = 255;
if (name.length > MAX_LENGTH) {
const ext = path.extname(name);
const nameWithoutExt = path.basename(name, ext);
name =
nameWithoutExt.slice(0, MAX_LENGTH - ext.length - 7) +
'-' +
crypto.randomBytes(3).toString('hex') +
ext;
}
return name;
}
module.exports = {
isEnabled,
handleText,
formatSteps,
escapeRegExp,
formatAction,
isUserProvided,
generateConfig,
addSpaceIfNeeded,
createOnProgress,
sanitizeFilename,
normalizeEndpointName,
};