mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
🔎 feat: Native Web Search with Citation References (#7516)
* WIP: search tool integration * WIP: Add web search capabilities and API key management to agent actions * WIP: web search capability to agent configuration and selection * WIP: Add web search capability to backend agent configuration * WIP: add web search option to default agent form values * WIP: add attachments for web search * feat: add plugin for processing web search citations * WIP: first pass, Citation UI * chore: remove console.log * feat: Add AnimatedTabs component for tabbed UI functionality * refactor: AnimatedTabs component with CSS animations and stable ID generation * WIP example content * feat: SearchContext for managing search results apart from MessageContext * feat: Enhance AnimatedTabs with underline animation and state management * WIP: first pass, Implement dynamic tab functionality in Sources component with search results integration * fix: Update class names for improved styling in Sources and AnimatedTabs components * feat: Improve styling and layout in Sources component with enhanced button and item designs * feat: Refactor Sources component to integrate OGDialog for source display and improve layout * style: Update background color in SourceItem and SourcesGroup components for improved visibility * refactor: Sources component to enhance SourceItem structure and improve favicon handling * style: Adjust font size of domain text in SourceItem for better readability * feat: Add localization for citation source and details in CompositeCitation component * style: add theming to Citation components * feat: Enhance SourceItem component with dialog support and improved hovercard functionality * feat: Add localization for sources tab and image alt text in Sources component * style: Replace divs with spans for better semantic structure in CompositeCitation and Citation components * refactor: Sources component to use useMemo for tab generation and improve performance * chore: bump @librechat/agents to v2.4.318 * chore: update search result types * fix: search results retrieval in ContentParts component, re-render attachments when expected * feat: update sources style/types to use latest search result structure * style: enhance Dialog (expanded) SourceItem component with link wrapping and improved styling * style: update ImageItem component styling for improved title visibility * refactor: remove SourceItemBase component and adjust SourceItem layout for improved styling * chore: linting twcss order * fix: prevent FileAttachment from rendering search attachments * fix: append underscore to responseMessageId for unique identification to prevent mapping of previous latest message's attachments * chore: remove unused parameter 'useSpecs' from loadTools function * chore: twcss order * WIP: WebSearch Tool UI * refactor: add limit parameter to StackedFavicons for customizable source display * refactor: optimize search results memoization by making more granular and separate conerns * refactor: integrated StackedFavicons to WebSearch mid-run * chore: bump @librechat/agents to expose handleToolCallChunks * chore: use typedefs from dedicated file instead of defining them in AgentClient module * WIP: first pass, search progress results * refactor: move createOnSearchResults function to a dedicated search module * chore: bump @librechat/agents to v2.4.320 * WIP: first pass, search results processed UX * refactor: consolidate context variables in createOnSearchResults function * chore: bump @librechat/agents to v2.4.321 * feat: add guidelines for web search tool response formatting in loadTools function * feat: add isLast prop to Part component and update WebSearch logic for improved state handling * style: update Hovercard styles for improved UI consistency * feat: export FaviconImage component for improved accessibility in other modules * refactor: export getCleanDomain function and use FaviconImage in Citation component for improved source representation * refactor: implement SourceHovercard component for consistency and DRY compliance * fix: replace <p> with <span> for snippet and title in SourceItem and SourceHovercard for consistency * style: `not-prose` * style: remove 'not-prose' class for consistency in SourceItem, Citation, and SourceHovercard components, adjust style classes * refactor: `imageUrl` on hover and prevent duplicate sources * refactor: enhance SourcesGroup dialog layout and improve source item presentation * refactor: reorganize Web Components, save in same directory * feat: add 'news' refType to refTypeMap for citation sources * style: adjust Hovercard width for improved layout * refactor: update tool usage guidelines for improved clarity and execution * chore: linting * feat: add Web Search badge with initial permissions and local storage logic * feat: add webSearch support to interface and permissions schemas * feat: implement Web Search API key management and localization updates * feat: refactor Web Search API key handling and integrate new search API key form * fix: remove unnecessary visibility state from FileAttachment component * feat: update WebSearch component to use Globe icon and localized search label * feat: enhance ApiKeyDialog with dropdown for reranker selection and update translations * feat: implement dropdown menus for engine, scraper, and reranker selection in ApiKeyDialog * chore: linting and add unknown instead of `any` type * feat: refactor ApiKeyDialog and useAuthSearchTool for improved API key management * refactor: update ocrSchema to use template literals for default apiKey and baseURL * feat: add web search configuration and utility functions for environment variable extraction * fix: ensure filepath is defined before checking its prefix in useAttachmentHandler * feat: enhance web search functionality with improved configuration and environment variable extraction for authFields * fix: update auth type in TPluginAction and TUpdateUserPlugins to use Partial<Record<string, string>> * feat: implement web search authentication verification and enhance webSearchAuth structure * feat: enhance ephemeral agent handling with new web search capability and type definition * feat: enhance isEphemeralAgent function to include web search selection * feat: refactor verifyWebSearchAuth to improve key handling and authentication checks * feat: implement loadWebSearchAuth function for improved web search authentication handling * feat: enhance web search authentication with new configuration options and refactor related types * refactor: rename search engine to search provider and update related localization keys * feat: update verifyWebSearchAuth to handle multiple authentication types and improve error handling * feat: update ApiKeyDialog to accept authTypes prop and remove isUserProvided check * feat: add tests for extractWebSearchEnvVars and loadWebSearchAuth functions * feat: enhance loadWebSearchAuth to support specific service checks for providers, scrapers, and rerankers * fix: update web search configuration key and adjust auth result handling in loadTools function * feat: add new progress key for repeated web searching and update localization * chore: bump @librechat/agents to 2.4.322 * feat: enhance loadTools function to include ISO time and improve search tool logging * feat: update StackedFavicons to handle negative start index and improve citation attribution styling and text * chore: update .gitignore to categorize AI-related files * fix: mobile responsiveness of sources/citations hovercards * feat: enhance source display with improved line clamping for better readability * chore: bump @librechat/agents to v2.4.33 * feat: add handling for image sources in references mapping * chore: bump librechat-data-provider version to 0.7.84 * chore: bump @librechat/agents version to 2.4.34 * fix: update auth handling to support multiple auth types in tools and allow key configuration in agent panel * chore: remove redundant agent attribution text from search form * fix: web search auth uninstall * refactor: convert CheckboxButton to a forwardRef component and update setValue callback signature * feat: add triggerRef prop to ApiKeyDialog components for improved dialog control * feat: integrate triggerRef in CodeInterpreter and WebSearch components for enhanced dialog management * feat: enhance ApiKeyDialog with additional links for Firecrawl and Jina API key guidance * feat: implement web search configuration handling in ApiKeyDialog and add tests for dropdown visibility * fix: update webSearchConfig reference in config route for correct payload assignment * feat: update ApiKeyDialog to conditionally render sections based on authTypes and modify loadWebSearchAuth to correctly categorize authentication types * feat: refactor ApiKeyDialog and related tests to use SearchCategories and RerankerTypes enums and remove nested ternaries * refactor: move ThinkingButton rendering to improve layout consistency in ContentParts * feat: integrate search context into Markdown component to conditionally include unicodeCitation plugin * chore: bump @librechat/agents to v2.4.35 * chore: remove unused 18n key * ci: add WEB_SEARCH permission testing and update AppService tests for new webSearch configuration * ci: add more comprehensive tests for loadWebSearchAuth to validate authentication handling and authTypes structure * chore: remove debugging console log from web.spec.ts to clean up test output
This commit is contained in:
parent
bf80cf30b3
commit
0dbbf7de04
73 changed files with 6366 additions and 2003 deletions
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "librechat-data-provider",
|
||||
"version": "0.7.83",
|
||||
"version": "0.7.84",
|
||||
"description": "data services for librechat apps",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/index.es.js",
|
||||
|
|
|
|||
899
packages/data-provider/specs/web.spec.ts
Normal file
899
packages/data-provider/specs/web.spec.ts
Normal file
|
|
@ -0,0 +1,899 @@
|
|||
import type {
|
||||
ScraperTypes,
|
||||
TCustomConfig,
|
||||
RerankerTypes,
|
||||
SearchProviders,
|
||||
TWebSearchConfig,
|
||||
} from '../src/config';
|
||||
import { webSearchAuth, loadWebSearchAuth, extractWebSearchEnvVars } from '../src/web';
|
||||
import { AuthType } from '../src/schemas';
|
||||
|
||||
// Mock the extractVariableName function
|
||||
jest.mock('../src/utils', () => ({
|
||||
extractVariableName: (value: string) => {
|
||||
if (!value || typeof value !== 'string') return null;
|
||||
const match = value.match(/^\${(.+)}$/);
|
||||
return match ? match[1] : null;
|
||||
},
|
||||
}));
|
||||
|
||||
describe('web.ts', () => {
|
||||
describe('extractWebSearchEnvVars', () => {
|
||||
it('should return empty array if config is undefined', () => {
|
||||
const result = extractWebSearchEnvVars({
|
||||
keys: ['serperApiKey', 'jinaApiKey'],
|
||||
config: undefined,
|
||||
});
|
||||
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should extract environment variable names from config values', () => {
|
||||
const config: Partial<TWebSearchConfig> = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: 'actual-api-key', // Not in env var format
|
||||
safeSearch: true,
|
||||
};
|
||||
|
||||
const result = extractWebSearchEnvVars({
|
||||
keys: ['serperApiKey', 'jinaApiKey', 'cohereApiKey'],
|
||||
config: config as TWebSearchConfig,
|
||||
});
|
||||
|
||||
expect(result).toEqual(['SERPER_API_KEY', 'JINA_API_KEY']);
|
||||
});
|
||||
|
||||
it('should only extract variables for keys that exist in the config', () => {
|
||||
const config: Partial<TWebSearchConfig> = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
// firecrawlApiKey is missing
|
||||
safeSearch: true,
|
||||
};
|
||||
|
||||
const result = extractWebSearchEnvVars({
|
||||
keys: ['serperApiKey', 'firecrawlApiKey'],
|
||||
config: config as TWebSearchConfig,
|
||||
});
|
||||
|
||||
expect(result).toEqual(['SERPER_API_KEY']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('loadWebSearchAuth', () => {
|
||||
// Common test variables
|
||||
const userId = 'test-user-id';
|
||||
let mockLoadAuthValues: jest.Mock;
|
||||
let webSearchConfig: TCustomConfig['webSearch'];
|
||||
|
||||
beforeEach(() => {
|
||||
// Reset mocks before each test
|
||||
jest.clearAllMocks();
|
||||
|
||||
// Initialize the mock function
|
||||
mockLoadAuthValues = jest.fn();
|
||||
|
||||
// Initialize a basic webSearchConfig
|
||||
webSearchConfig = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
};
|
||||
});
|
||||
|
||||
it('should return authenticated=true when all required categories are authenticated', async () => {
|
||||
// Mock successful authentication for all services
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(result.authTypes).toHaveLength(3); // providers, scrapers, rerankers
|
||||
expect(result.authResult).toHaveProperty('serperApiKey', 'test-api-key');
|
||||
expect(result.authResult).toHaveProperty('firecrawlApiKey', 'test-api-key');
|
||||
|
||||
// The implementation only includes one reranker in the result
|
||||
// It will be either jina or cohere, but not both
|
||||
if (result.authResult.rerankerType === 'jina') {
|
||||
expect(result.authResult).toHaveProperty('jinaApiKey', 'test-api-key');
|
||||
} else {
|
||||
expect(result.authResult).toHaveProperty('cohereApiKey', 'test-api-key');
|
||||
}
|
||||
|
||||
expect(result.authResult).toHaveProperty('searchProvider', 'serper');
|
||||
expect(result.authResult).toHaveProperty('scraperType', 'firecrawl');
|
||||
expect(['jina', 'cohere']).toContain(result.authResult.rerankerType as string);
|
||||
});
|
||||
|
||||
it('should return authenticated=false when a required category is not authenticated', async () => {
|
||||
// Mock authentication failure for the providers category
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
// Only provide values for scrapers and rerankers, not for providers
|
||||
if (field !== 'SERPER_API_KEY') {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(false);
|
||||
// We should still have authTypes for the categories we checked
|
||||
expect(result.authTypes.some(([category]) => category === 'providers')).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle exceptions from loadAuthValues', async () => {
|
||||
// Mock loadAuthValues to throw an error
|
||||
mockLoadAuthValues.mockImplementation(() => {
|
||||
throw new Error('Authentication failed');
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
throwError: false, // Don't throw errors
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(false);
|
||||
});
|
||||
|
||||
it('should correctly identify user-provided vs system-defined auth', async () => {
|
||||
// Mock environment variables
|
||||
const originalEnv = process.env;
|
||||
process.env = {
|
||||
...originalEnv,
|
||||
SERPER_API_KEY: 'system-api-key',
|
||||
FIRECRAWL_API_KEY: 'system-api-key',
|
||||
JINA_API_KEY: 'system-api-key',
|
||||
};
|
||||
|
||||
// Mock loadAuthValues to return different values for some keys
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
if (field === 'SERPER_API_KEY') {
|
||||
// This matches the system env var
|
||||
result[field] = 'system-api-key';
|
||||
} else if (field === 'FIRECRAWL_API_KEY') {
|
||||
// This is different from the system env var (user provided)
|
||||
result[field] = 'user-api-key';
|
||||
} else if (field === 'FIRECRAWL_API_URL') {
|
||||
result[field] = 'https://api.firecrawl.dev';
|
||||
} else if (field === 'JINA_API_KEY') {
|
||||
// This matches the system env var
|
||||
result[field] = 'system-api-key';
|
||||
} else {
|
||||
result[field] = 'test-api-key';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
// Check for providers (system-defined) and scrapers (user-provided)
|
||||
const providersAuthType = result.authTypes.find(
|
||||
([category]) => category === 'providers',
|
||||
)?.[1];
|
||||
const scrapersAuthType = result.authTypes.find(([category]) => category === 'scrapers')?.[1];
|
||||
|
||||
expect(providersAuthType).toBe(AuthType.SYSTEM_DEFINED);
|
||||
expect(scrapersAuthType).toBe(AuthType.USER_PROVIDED);
|
||||
|
||||
// Restore original env
|
||||
process.env = originalEnv;
|
||||
});
|
||||
|
||||
it('should handle optional fields correctly', async () => {
|
||||
// Create a config without the optional firecrawlApiUrl
|
||||
const configWithoutOptional = { ...webSearchConfig } as Partial<TWebSearchConfig>;
|
||||
delete configWithoutOptional.firecrawlApiUrl;
|
||||
|
||||
mockLoadAuthValues.mockImplementation(({ authFields, optional }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
// Don't provide values for optional fields
|
||||
if (!optional?.has(field)) {
|
||||
result[field] = 'test-api-key';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig: configWithoutOptional as TWebSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(result.authResult).toHaveProperty('firecrawlApiKey', 'test-api-key');
|
||||
// Optional URL should not be in the result
|
||||
expect(result.authResult.firecrawlApiUrl).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should preserve safeSearch setting from webSearchConfig', async () => {
|
||||
// Mock successful authentication
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] = 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
// Test with safeSearch: false
|
||||
const configWithSafeSearchOff = { ...webSearchConfig, safeSearch: false } as TWebSearchConfig;
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig: configWithSafeSearchOff,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authResult).toHaveProperty('safeSearch', false);
|
||||
});
|
||||
|
||||
it('should set the correct service types in authResult', async () => {
|
||||
// Mock successful authentication
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
// Check that the correct service types are set
|
||||
expect(result.authResult.searchProvider).toBe('serper' as SearchProviders);
|
||||
expect(result.authResult.scraperType).toBe('firecrawl' as ScraperTypes);
|
||||
// One of the rerankers should be set
|
||||
expect(['jina', 'cohere']).toContain(result.authResult.rerankerType as string);
|
||||
});
|
||||
|
||||
it('should check all services if none are specified', async () => {
|
||||
// Initialize a webSearchConfig without specific services
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
};
|
||||
|
||||
// Mock successful authentication
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
|
||||
// Should have checked all categories
|
||||
expect(result.authTypes).toHaveLength(3);
|
||||
|
||||
// Should have set values for all categories
|
||||
expect(result.authResult.searchProvider).toBeDefined();
|
||||
expect(result.authResult.scraperType).toBeDefined();
|
||||
expect(result.authResult.rerankerType).toBeDefined();
|
||||
});
|
||||
|
||||
it('should correctly identify authTypes based on specific configurations', async () => {
|
||||
// Set up environment variables for system-defined auth
|
||||
const originalEnv = process.env;
|
||||
process.env = {
|
||||
...originalEnv,
|
||||
SERPER_API_KEY: 'system-serper-key',
|
||||
FIRECRAWL_API_KEY: 'system-firecrawl-key',
|
||||
FIRECRAWL_API_URL: 'https://api.firecrawl.dev',
|
||||
JINA_API_KEY: 'system-jina-key',
|
||||
COHERE_API_KEY: 'system-cohere-key',
|
||||
};
|
||||
|
||||
// Initialize webSearchConfig with environment variable references
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
// Specify which services to use
|
||||
searchProvider: 'serper' as SearchProviders,
|
||||
scraperType: 'firecrawl' as ScraperTypes,
|
||||
rerankerType: 'jina' as RerankerTypes,
|
||||
};
|
||||
|
||||
// Mock loadAuthValues to return the actual values
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
if (field === 'SERPER_API_KEY') {
|
||||
result[field] = 'system-serper-key';
|
||||
} else if (field === 'FIRECRAWL_API_KEY') {
|
||||
result[field] = 'system-firecrawl-key';
|
||||
} else if (field === 'FIRECRAWL_API_URL') {
|
||||
result[field] = 'https://api.firecrawl.dev';
|
||||
} else if (field === 'JINA_API_KEY') {
|
||||
result[field] = 'system-jina-key';
|
||||
} else if (field === 'COHERE_API_KEY') {
|
||||
result[field] = 'system-cohere-key';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
// Verify that all required fields are present in the authResult
|
||||
expect(result.authResult).toHaveProperty('serperApiKey');
|
||||
expect(result.authResult).toHaveProperty('firecrawlApiKey');
|
||||
expect(result.authResult).toHaveProperty('firecrawlApiUrl');
|
||||
expect(result.authResult).toHaveProperty('jinaApiKey');
|
||||
expect(result.authResult).toHaveProperty('searchProvider');
|
||||
expect(result.authResult).toHaveProperty('scraperType');
|
||||
expect(result.authResult).toHaveProperty('rerankerType');
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
|
||||
// Verify authTypes for each category
|
||||
const providersAuthType = result.authTypes.find(
|
||||
([category]) => category === 'providers',
|
||||
)?.[1];
|
||||
const scrapersAuthType = result.authTypes.find(([category]) => category === 'scrapers')?.[1];
|
||||
const rerankersAuthType = result.authTypes.find(
|
||||
([category]) => category === 'rerankers',
|
||||
)?.[1];
|
||||
|
||||
// All should be system-defined since we're using environment variables
|
||||
expect(providersAuthType).toBe(AuthType.SYSTEM_DEFINED);
|
||||
expect(scrapersAuthType).toBe(AuthType.SYSTEM_DEFINED);
|
||||
expect(rerankersAuthType).toBe(AuthType.SYSTEM_DEFINED);
|
||||
|
||||
// Verify the authResult contains the correct values
|
||||
expect(result.authResult).toHaveProperty('serperApiKey', 'system-serper-key');
|
||||
expect(result.authResult).toHaveProperty('firecrawlApiKey', 'system-firecrawl-key');
|
||||
expect(result.authResult).toHaveProperty('firecrawlApiUrl', 'https://api.firecrawl.dev');
|
||||
expect(result.authResult).toHaveProperty('jinaApiKey', 'system-jina-key');
|
||||
expect(result.authResult).toHaveProperty('searchProvider', 'serper');
|
||||
expect(result.authResult).toHaveProperty('scraperType', 'firecrawl');
|
||||
expect(result.authResult).toHaveProperty('rerankerType', 'jina');
|
||||
|
||||
// Restore original env
|
||||
process.env = originalEnv;
|
||||
});
|
||||
|
||||
it('should handle custom variable names in environment variables', async () => {
|
||||
// Set up environment variables with custom names
|
||||
const originalEnv = process.env;
|
||||
process.env = {
|
||||
...originalEnv,
|
||||
CUSTOM_SERPER_KEY: 'custom-serper-key',
|
||||
CUSTOM_FIRECRAWL_KEY: 'custom-firecrawl-key',
|
||||
CUSTOM_FIRECRAWL_URL: 'https://custom.firecrawl.dev',
|
||||
CUSTOM_JINA_KEY: 'custom-jina-key',
|
||||
CUSTOM_COHERE_KEY: 'custom-cohere-key',
|
||||
};
|
||||
|
||||
// Initialize webSearchConfig with custom variable names
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${CUSTOM_SERPER_KEY}',
|
||||
firecrawlApiKey: '${CUSTOM_FIRECRAWL_KEY}',
|
||||
firecrawlApiUrl: '${CUSTOM_FIRECRAWL_URL}',
|
||||
jinaApiKey: '${CUSTOM_JINA_KEY}',
|
||||
cohereApiKey: '${CUSTOM_COHERE_KEY}',
|
||||
safeSearch: true,
|
||||
// Specify which services to use
|
||||
searchProvider: 'serper' as SearchProviders,
|
||||
scraperType: 'firecrawl' as ScraperTypes,
|
||||
rerankerType: 'jina' as RerankerTypes, // Only Jina will be checked
|
||||
};
|
||||
|
||||
// Mock loadAuthValues to return the actual values
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
if (field === 'CUSTOM_SERPER_KEY') {
|
||||
result[field] = 'custom-serper-key';
|
||||
} else if (field === 'CUSTOM_FIRECRAWL_KEY') {
|
||||
result[field] = 'custom-firecrawl-key';
|
||||
} else if (field === 'CUSTOM_FIRECRAWL_URL') {
|
||||
result[field] = 'https://custom.firecrawl.dev';
|
||||
} else if (field === 'CUSTOM_JINA_KEY') {
|
||||
result[field] = 'custom-jina-key';
|
||||
}
|
||||
// Note: CUSTOM_COHERE_KEY is not checked because we specified jina as rerankerType
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
|
||||
// Verify the authResult contains the correct values from custom variables
|
||||
expect(result.authResult).toHaveProperty('serperApiKey', 'custom-serper-key');
|
||||
expect(result.authResult).toHaveProperty('firecrawlApiKey', 'custom-firecrawl-key');
|
||||
expect(result.authResult).toHaveProperty('firecrawlApiUrl', 'https://custom.firecrawl.dev');
|
||||
expect(result.authResult).toHaveProperty('jinaApiKey', 'custom-jina-key');
|
||||
// cohereApiKey should not be in the result since we specified jina as rerankerType
|
||||
expect(result.authResult).not.toHaveProperty('cohereApiKey');
|
||||
|
||||
// Verify the service types are set correctly
|
||||
expect(result.authResult).toHaveProperty('searchProvider', 'serper');
|
||||
expect(result.authResult).toHaveProperty('scraperType', 'firecrawl');
|
||||
expect(result.authResult).toHaveProperty('rerankerType', 'jina');
|
||||
|
||||
// Restore original env
|
||||
process.env = originalEnv;
|
||||
});
|
||||
|
||||
it('should always return authTypes array with exactly 3 categories', async () => {
|
||||
// Set up environment variables
|
||||
const originalEnv = process.env;
|
||||
process.env = {
|
||||
...originalEnv,
|
||||
SERPER_API_KEY: 'test-key',
|
||||
FIRECRAWL_API_KEY: 'test-key',
|
||||
FIRECRAWL_API_URL: 'https://api.firecrawl.dev',
|
||||
JINA_API_KEY: 'test-key',
|
||||
};
|
||||
|
||||
// Initialize webSearchConfig with environment variable references
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
};
|
||||
|
||||
// Mock loadAuthValues to return values
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] = field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
// Get the number of categories from webSearchAuth
|
||||
const expectedCategoryCount = Object.keys(webSearchAuth).length;
|
||||
|
||||
// Verify authTypes array structure
|
||||
expect(result.authTypes).toHaveLength(expectedCategoryCount);
|
||||
|
||||
// Verify each category exists exactly once
|
||||
const categories = result.authTypes.map(([category]) => category);
|
||||
Object.keys(webSearchAuth).forEach((category) => {
|
||||
expect(categories).toContain(category);
|
||||
});
|
||||
|
||||
// Verify no duplicate categories
|
||||
expect(new Set(categories).size).toBe(expectedCategoryCount);
|
||||
|
||||
// Verify each entry has the correct format [category, AuthType]
|
||||
result.authTypes.forEach(([category, authType]) => {
|
||||
expect(typeof category).toBe('string');
|
||||
expect([AuthType.SYSTEM_DEFINED, AuthType.USER_PROVIDED]).toContain(authType);
|
||||
});
|
||||
|
||||
// Restore original env
|
||||
process.env = originalEnv;
|
||||
});
|
||||
|
||||
it('should maintain authTypes array structure even when authentication fails', async () => {
|
||||
// Set up environment variables
|
||||
const originalEnv = process.env;
|
||||
process.env = {
|
||||
...originalEnv,
|
||||
SERPER_API_KEY: 'test-key',
|
||||
// Missing other keys to force authentication failure
|
||||
};
|
||||
|
||||
// Initialize webSearchConfig with environment variable references
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
};
|
||||
|
||||
// Mock loadAuthValues to return partial values
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
if (field === 'SERPER_API_KEY') {
|
||||
result[field] = 'test-key';
|
||||
}
|
||||
// Other fields are intentionally missing
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
// Get the number of categories from webSearchAuth
|
||||
const expectedCategoryCount = Object.keys(webSearchAuth).length;
|
||||
|
||||
// Verify authentication failed
|
||||
expect(result.authenticated).toBe(false);
|
||||
|
||||
// Verify authTypes array structure is maintained
|
||||
expect(result.authTypes).toHaveLength(expectedCategoryCount);
|
||||
|
||||
// Verify each category exists exactly once
|
||||
const categories = result.authTypes.map(([category]) => category);
|
||||
Object.keys(webSearchAuth).forEach((category) => {
|
||||
expect(categories).toContain(category);
|
||||
});
|
||||
|
||||
// Verify no duplicate categories
|
||||
expect(new Set(categories).size).toBe(expectedCategoryCount);
|
||||
|
||||
// Verify each entry has the correct format [category, AuthType]
|
||||
result.authTypes.forEach(([category, authType]) => {
|
||||
expect(typeof category).toBe('string');
|
||||
expect([AuthType.SYSTEM_DEFINED, AuthType.USER_PROVIDED]).toContain(authType);
|
||||
});
|
||||
|
||||
// Restore original env
|
||||
process.env = originalEnv;
|
||||
});
|
||||
});
|
||||
|
||||
describe('webSearchAuth', () => {
|
||||
it('should have the expected structure', () => {
|
||||
// Check that all expected categories exist
|
||||
expect(webSearchAuth).toHaveProperty('providers');
|
||||
expect(webSearchAuth).toHaveProperty('scrapers');
|
||||
expect(webSearchAuth).toHaveProperty('rerankers');
|
||||
|
||||
// Check providers
|
||||
expect(webSearchAuth.providers).toHaveProperty('serper');
|
||||
expect(webSearchAuth.providers.serper).toHaveProperty('serperApiKey', 1);
|
||||
|
||||
// Check scrapers
|
||||
expect(webSearchAuth.scrapers).toHaveProperty('firecrawl');
|
||||
expect(webSearchAuth.scrapers.firecrawl).toHaveProperty('firecrawlApiKey', 1);
|
||||
expect(webSearchAuth.scrapers.firecrawl).toHaveProperty('firecrawlApiUrl', 0);
|
||||
|
||||
// Check rerankers
|
||||
expect(webSearchAuth.rerankers).toHaveProperty('jina');
|
||||
expect(webSearchAuth.rerankers.jina).toHaveProperty('jinaApiKey', 1);
|
||||
expect(webSearchAuth.rerankers).toHaveProperty('cohere');
|
||||
expect(webSearchAuth.rerankers.cohere).toHaveProperty('cohereApiKey', 1);
|
||||
});
|
||||
|
||||
it('should mark required keys with value 1', () => {
|
||||
// All keys with value 1 are required
|
||||
expect(webSearchAuth.providers.serper.serperApiKey).toBe(1);
|
||||
expect(webSearchAuth.scrapers.firecrawl.firecrawlApiKey).toBe(1);
|
||||
expect(webSearchAuth.rerankers.jina.jinaApiKey).toBe(1);
|
||||
expect(webSearchAuth.rerankers.cohere.cohereApiKey).toBe(1);
|
||||
});
|
||||
|
||||
it('should mark optional keys with value 0', () => {
|
||||
// Keys with value 0 are optional
|
||||
expect(webSearchAuth.scrapers.firecrawl.firecrawlApiUrl).toBe(0);
|
||||
});
|
||||
});
|
||||
describe('loadWebSearchAuth with specific services', () => {
|
||||
// Common test variables
|
||||
const userId = 'test-user-id';
|
||||
let mockLoadAuthValues: jest.Mock;
|
||||
|
||||
beforeEach(() => {
|
||||
// Reset mocks before each test
|
||||
jest.clearAllMocks();
|
||||
|
||||
// Initialize the mock function
|
||||
mockLoadAuthValues = jest.fn();
|
||||
});
|
||||
|
||||
it('should only check the specified searchProvider', async () => {
|
||||
// Initialize a webSearchConfig with a specific searchProvider
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
searchProvider: 'serper' as SearchProviders,
|
||||
};
|
||||
|
||||
// Mock successful authentication
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(result.authResult.searchProvider).toBe('serper');
|
||||
|
||||
// Verify that only SERPER_API_KEY was requested for the providers category
|
||||
const providerCalls = mockLoadAuthValues.mock.calls.filter((call) =>
|
||||
call[0].authFields.includes('SERPER_API_KEY'),
|
||||
);
|
||||
expect(providerCalls.length).toBe(1);
|
||||
});
|
||||
|
||||
it('should only check the specified scraperType', async () => {
|
||||
// Initialize a webSearchConfig with a specific scraperType
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
scraperType: 'firecrawl' as ScraperTypes,
|
||||
};
|
||||
|
||||
// Mock successful authentication
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(result.authResult.scraperType).toBe('firecrawl');
|
||||
|
||||
// Verify that only FIRECRAWL_API_KEY and FIRECRAWL_API_URL were requested for the scrapers category
|
||||
const scraperCalls = mockLoadAuthValues.mock.calls.filter((call) =>
|
||||
call[0].authFields.includes('FIRECRAWL_API_KEY'),
|
||||
);
|
||||
expect(scraperCalls.length).toBe(1);
|
||||
});
|
||||
|
||||
it('should only check the specified rerankerType', async () => {
|
||||
// Initialize a webSearchConfig with a specific rerankerType
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
rerankerType: 'jina' as RerankerTypes,
|
||||
};
|
||||
|
||||
// Mock successful authentication
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(result.authResult.rerankerType).toBe('jina');
|
||||
|
||||
// Verify that only JINA_API_KEY was requested for the rerankers category
|
||||
const rerankerCalls = mockLoadAuthValues.mock.calls.filter((call) =>
|
||||
call[0].authFields.includes('JINA_API_KEY'),
|
||||
);
|
||||
expect(rerankerCalls.length).toBe(1);
|
||||
|
||||
// Verify that COHERE_API_KEY was not requested
|
||||
const cohereCalls = mockLoadAuthValues.mock.calls.filter((call) =>
|
||||
call[0].authFields.includes('COHERE_API_KEY'),
|
||||
);
|
||||
expect(cohereCalls.length).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle invalid specified service gracefully', async () => {
|
||||
// Initialize a webSearchConfig with an invalid searchProvider
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
searchProvider: 'invalid-provider' as SearchProviders,
|
||||
};
|
||||
|
||||
// Mock successful authentication
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
// Should fail because the specified provider doesn't exist
|
||||
expect(result.authenticated).toBe(false);
|
||||
});
|
||||
|
||||
it('should fail authentication when specified service is not authenticated but others are', async () => {
|
||||
// Initialize a webSearchConfig with a specific rerankerType (jina)
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
rerankerType: 'jina' as RerankerTypes,
|
||||
};
|
||||
|
||||
// Mock authentication where cohere is authenticated but jina is not
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
// Authenticate all fields except JINA_API_KEY
|
||||
if (field !== 'JINA_API_KEY') {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
}
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
// Should fail because the specified reranker (jina) is not authenticated
|
||||
// even though another reranker (cohere) might be authenticated
|
||||
expect(result.authenticated).toBe(false);
|
||||
|
||||
// Verify that JINA_API_KEY was requested
|
||||
const jinaApiKeyCalls = mockLoadAuthValues.mock.calls.filter((call) =>
|
||||
call[0].authFields.includes('JINA_API_KEY'),
|
||||
);
|
||||
expect(jinaApiKeyCalls.length).toBe(1);
|
||||
|
||||
// Verify that COHERE_API_KEY was not requested since we specified jina
|
||||
const cohereApiKeyCalls = mockLoadAuthValues.mock.calls.filter((call) =>
|
||||
call[0].authFields.includes('COHERE_API_KEY'),
|
||||
);
|
||||
expect(cohereApiKeyCalls.length).toBe(0);
|
||||
});
|
||||
|
||||
it('should check all services if none are specified', async () => {
|
||||
// Initialize a webSearchConfig without specific services
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: true,
|
||||
};
|
||||
|
||||
// Mock successful authentication
|
||||
mockLoadAuthValues.mockImplementation(({ authFields }) => {
|
||||
const result: Record<string, string> = {};
|
||||
authFields.forEach((field) => {
|
||||
result[field] =
|
||||
field === 'FIRECRAWL_API_URL' ? 'https://api.firecrawl.dev' : 'test-api-key';
|
||||
});
|
||||
return Promise.resolve(result);
|
||||
});
|
||||
|
||||
const result = await loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues: mockLoadAuthValues,
|
||||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
|
||||
// Should have checked all categories
|
||||
expect(result.authTypes).toHaveLength(3);
|
||||
|
||||
// Should have set values for all categories
|
||||
expect(result.authResult.searchProvider).toBeDefined();
|
||||
expect(result.authResult.scraperType).toBeDefined();
|
||||
expect(result.authResult.rerankerType).toBeDefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -167,6 +167,7 @@ export enum AgentCapabilities {
|
|||
end_after_tools = 'end_after_tools',
|
||||
execute_code = 'execute_code',
|
||||
file_search = 'file_search',
|
||||
web_search = 'web_search',
|
||||
artifacts = 'artifacts',
|
||||
actions = 'actions',
|
||||
tools = 'tools',
|
||||
|
|
@ -245,11 +246,12 @@ export const agentsEndpointSChema = baseEndpointSchema.merge(
|
|||
.default([
|
||||
AgentCapabilities.execute_code,
|
||||
AgentCapabilities.file_search,
|
||||
AgentCapabilities.web_search,
|
||||
AgentCapabilities.artifacts,
|
||||
AgentCapabilities.actions,
|
||||
AgentCapabilities.tools,
|
||||
AgentCapabilities.ocr,
|
||||
AgentCapabilities.chain,
|
||||
AgentCapabilities.ocr,
|
||||
]),
|
||||
}),
|
||||
);
|
||||
|
|
@ -494,6 +496,7 @@ export const intefaceSchema = z
|
|||
agents: z.boolean().optional(),
|
||||
temporaryChat: z.boolean().optional(),
|
||||
runCode: z.boolean().optional(),
|
||||
webSearch: z.boolean().optional(),
|
||||
})
|
||||
.default({
|
||||
endpointsMenu: true,
|
||||
|
|
@ -507,6 +510,7 @@ export const intefaceSchema = z
|
|||
agents: true,
|
||||
temporaryChat: true,
|
||||
runCode: true,
|
||||
webSearch: true,
|
||||
});
|
||||
|
||||
export type TInterfaceConfig = z.infer<typeof intefaceSchema>;
|
||||
|
|
@ -567,6 +571,11 @@ export type TStartupConfig = {
|
|||
instanceProjectId: string;
|
||||
bundlerURL?: string;
|
||||
staticBundlerURL?: string;
|
||||
webSearch?: {
|
||||
searchProvider?: SearchProviders;
|
||||
scraperType?: ScraperTypes;
|
||||
rerankerType?: RerankerTypes;
|
||||
};
|
||||
};
|
||||
|
||||
export enum OCRStrategy {
|
||||
|
|
@ -574,10 +583,45 @@ export enum OCRStrategy {
|
|||
CUSTOM_OCR = 'custom_ocr',
|
||||
}
|
||||
|
||||
export enum SearchCategories {
|
||||
PROVIDERS = 'providers',
|
||||
SCRAPERS = 'scrapers',
|
||||
RERANKERS = 'rerankers',
|
||||
}
|
||||
|
||||
export enum SearchProviders {
|
||||
SERPER = 'serper',
|
||||
SEARXNG = 'searxng',
|
||||
}
|
||||
|
||||
export enum ScraperTypes {
|
||||
FIRECRAWL = 'firecrawl',
|
||||
SERPER = 'serper',
|
||||
}
|
||||
|
||||
export enum RerankerTypes {
|
||||
JINA = 'jina',
|
||||
COHERE = 'cohere',
|
||||
}
|
||||
|
||||
export const webSearchSchema = z.object({
|
||||
serperApiKey: z.string().optional().default('${SERPER_API_KEY}'),
|
||||
firecrawlApiKey: z.string().optional().default('${FIRECRAWL_API_KEY}'),
|
||||
firecrawlApiUrl: z.string().optional().default('${FIRECRAWL_API_URL}'),
|
||||
jinaApiKey: z.string().optional().default('${JINA_API_KEY}'),
|
||||
cohereApiKey: z.string().optional().default('${COHERE_API_KEY}'),
|
||||
searchProvider: z.nativeEnum(SearchProviders).optional(),
|
||||
scraperType: z.nativeEnum(ScraperTypes).optional(),
|
||||
rerankerType: z.nativeEnum(RerankerTypes).optional(),
|
||||
safeSearch: z.boolean().default(true),
|
||||
});
|
||||
|
||||
export type TWebSearchConfig = z.infer<typeof webSearchSchema>;
|
||||
|
||||
export const ocrSchema = z.object({
|
||||
mistralModel: z.string().optional(),
|
||||
apiKey: z.string().optional().default('OCR_API_KEY'),
|
||||
baseURL: z.string().optional().default('OCR_BASEURL'),
|
||||
apiKey: z.string().optional().default('${OCR_API_KEY}'),
|
||||
baseURL: z.string().optional().default('${OCR_BASEURL}'),
|
||||
strategy: z.nativeEnum(OCRStrategy).default(OCRStrategy.MISTRAL_OCR),
|
||||
});
|
||||
|
||||
|
|
@ -597,6 +641,7 @@ export const configSchema = z.object({
|
|||
version: z.string(),
|
||||
cache: z.boolean().default(true),
|
||||
ocr: ocrSchema.optional(),
|
||||
webSearch: webSearchSchema.optional(),
|
||||
secureImageLinks: z.boolean().optional(),
|
||||
imageOutputType: z.nativeEnum(EImageOutputType).default(EImageOutputType.PNG),
|
||||
includedTools: z.array(z.string()).optional(),
|
||||
|
|
@ -1336,6 +1381,8 @@ export enum LocalStorageKeys {
|
|||
LAST_MCP_ = 'LAST_MCP_',
|
||||
/** Last checked toggle for Code Interpreter API per conversation ID */
|
||||
LAST_CODE_TOGGLE_ = 'LAST_CODE_TOGGLE_',
|
||||
/** Last checked toggle for Web Search per conversation ID */
|
||||
LAST_WEB_SEARCH_TOGGLE_ = 'LAST_WEB_SEARCH_TOGGLE_',
|
||||
}
|
||||
|
||||
export enum ForkOptions {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ export * from './generate';
|
|||
export * from './models';
|
||||
/* mcp */
|
||||
export * from './mcp';
|
||||
/* web search */
|
||||
export * from './web';
|
||||
/* RBAC */
|
||||
export * from './permissions';
|
||||
export * from './roles';
|
||||
|
|
@ -25,6 +27,7 @@ export * from './types/files';
|
|||
export * from './types/mutations';
|
||||
export * from './types/queries';
|
||||
export * from './types/runs';
|
||||
export * from './types/web';
|
||||
/* query/mutation keys */
|
||||
export * from './keys';
|
||||
/* api call helpers */
|
||||
|
|
|
|||
|
|
@ -28,6 +28,10 @@ export enum PermissionTypes {
|
|||
* Type for using the "Run Code" LC Code Interpreter API feature
|
||||
*/
|
||||
RUN_CODE = 'RUN_CODE',
|
||||
/**
|
||||
* Type for using the "Web Search" feature
|
||||
*/
|
||||
WEB_SEARCH = 'WEB_SEARCH',
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -79,6 +83,11 @@ export const runCodePermissionsSchema = z.object({
|
|||
});
|
||||
export type TRunCodePermissions = z.infer<typeof runCodePermissionsSchema>;
|
||||
|
||||
export const webSearchPermissionsSchema = z.object({
|
||||
[Permissions.USE]: z.boolean().default(true),
|
||||
});
|
||||
export type TWebSearchPermissions = z.infer<typeof webSearchPermissionsSchema>;
|
||||
|
||||
// Define a single permissions schema that holds all permission types.
|
||||
export const permissionsSchema = z.object({
|
||||
[PermissionTypes.PROMPTS]: promptPermissionsSchema,
|
||||
|
|
@ -87,4 +96,5 @@ export const permissionsSchema = z.object({
|
|||
[PermissionTypes.MULTI_CONVO]: multiConvoPermissionsSchema,
|
||||
[PermissionTypes.TEMPORARY_CHAT]: temporaryChatPermissionsSchema,
|
||||
[PermissionTypes.RUN_CODE]: runCodePermissionsSchema,
|
||||
[PermissionTypes.WEB_SEARCH]: webSearchPermissionsSchema,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import {
|
|||
agentPermissionsSchema,
|
||||
promptPermissionsSchema,
|
||||
runCodePermissionsSchema,
|
||||
webSearchPermissionsSchema,
|
||||
bookmarkPermissionsSchema,
|
||||
multiConvoPermissionsSchema,
|
||||
temporaryChatPermissionsSchema,
|
||||
|
|
@ -62,6 +63,9 @@ const defaultRolesSchema = z.object({
|
|||
[PermissionTypes.RUN_CODE]: runCodePermissionsSchema.extend({
|
||||
[Permissions.USE]: z.boolean().default(true),
|
||||
}),
|
||||
[PermissionTypes.WEB_SEARCH]: webSearchPermissionsSchema.extend({
|
||||
[Permissions.USE]: z.boolean().default(true),
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
[SystemRoles.USER]: roleSchema.extend({
|
||||
|
|
@ -96,6 +100,9 @@ export const roleDefaults = defaultRolesSchema.parse({
|
|||
[PermissionTypes.RUN_CODE]: {
|
||||
[Permissions.USE]: true,
|
||||
},
|
||||
[PermissionTypes.WEB_SEARCH]: {
|
||||
[Permissions.USE]: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
[SystemRoles.USER]: {
|
||||
|
|
@ -107,6 +114,7 @@ export const roleDefaults = defaultRolesSchema.parse({
|
|||
[PermissionTypes.MULTI_CONVO]: {},
|
||||
[PermissionTypes.TEMPORARY_CHAT]: {},
|
||||
[PermissionTypes.RUN_CODE]: {},
|
||||
[PermissionTypes.WEB_SEARCH]: {},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { z } from 'zod';
|
||||
import { Tools } from './types/assistants';
|
||||
import type { TMessageContentParts, FunctionTool, FunctionToolCall } from './types/assistants';
|
||||
import type { SearchResultData } from './types/web';
|
||||
import type { TEphemeralAgent } from './types';
|
||||
import type { TFile } from './types/files';
|
||||
|
||||
|
|
@ -101,7 +102,8 @@ export const isEphemeralAgent = (
|
|||
}
|
||||
const hasMCPSelected = (ephemeralAgent?.mcp?.length ?? 0) > 0;
|
||||
const hasCodeSelected = (ephemeralAgent?.execute_code ?? false) === true;
|
||||
return hasMCPSelected || hasCodeSelected;
|
||||
const hasSearchSelected = (ephemeralAgent?.web_search ?? false) === true;
|
||||
return hasMCPSelected || hasCodeSelected || hasSearchSelected;
|
||||
};
|
||||
|
||||
export const isParamEndpoint = (
|
||||
|
|
@ -177,6 +179,7 @@ export const defaultAgentFormValues = {
|
|||
recursion_limit: undefined,
|
||||
[Tools.execute_code]: false,
|
||||
[Tools.file_search]: false,
|
||||
[Tools.web_search]: false,
|
||||
};
|
||||
|
||||
export const ImageVisionTool: FunctionTool = {
|
||||
|
|
@ -517,7 +520,13 @@ export const tMessageSchema = z.object({
|
|||
iconURL: z.string().nullable().optional(),
|
||||
});
|
||||
|
||||
export type TAttachmentMetadata = { messageId: string; toolCallId: string };
|
||||
export type TAttachmentMetadata = {
|
||||
type?: Tools;
|
||||
messageId: string;
|
||||
toolCallId: string;
|
||||
[Tools.web_search]?: SearchResultData;
|
||||
};
|
||||
|
||||
export type TAttachment =
|
||||
| (TFile & TAttachmentMetadata)
|
||||
| (Pick<TFile, 'filename' | 'filepath' | 'conversationId'> & {
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ export type TEndpointOption = {
|
|||
|
||||
export type TEphemeralAgent = {
|
||||
mcp?: string[];
|
||||
web_search?: boolean;
|
||||
execute_code?: boolean;
|
||||
};
|
||||
|
||||
|
|
@ -79,7 +80,7 @@ export type EventSubmission = Omit<TSubmission, 'initialResponse'> & { initialRe
|
|||
export type TPluginAction = {
|
||||
pluginKey: string;
|
||||
action: 'install' | 'uninstall';
|
||||
auth?: unknown;
|
||||
auth?: Partial<Record<string, string>>;
|
||||
isEntityTool?: boolean;
|
||||
};
|
||||
|
||||
|
|
@ -89,7 +90,7 @@ export type TUpdateUserPlugins = {
|
|||
isEntityTool?: boolean;
|
||||
pluginKey: string;
|
||||
action: string;
|
||||
auth?: unknown;
|
||||
auth?: Partial<Record<string, string | null>>;
|
||||
};
|
||||
|
||||
// TODO `label` needs to be changed to the proper `TranslationKeys`
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ export enum Tools {
|
|||
execute_code = 'execute_code',
|
||||
code_interpreter = 'code_interpreter',
|
||||
file_search = 'file_search',
|
||||
web_search = 'web_search',
|
||||
retrieval = 'retrieval',
|
||||
function = 'function',
|
||||
}
|
||||
|
|
|
|||
|
|
@ -129,18 +129,18 @@ export type UpdateAgentVariables = {
|
|||
data: AgentUpdateParams;
|
||||
};
|
||||
|
||||
export type DuplicateVersionError = Error & {
|
||||
statusCode?: number;
|
||||
details?: {
|
||||
duplicateVersion?: any;
|
||||
versionIndex?: number
|
||||
}
|
||||
export type DuplicateVersionError = Error & {
|
||||
statusCode?: number;
|
||||
details?: {
|
||||
duplicateVersion?: unknown;
|
||||
versionIndex?: number;
|
||||
};
|
||||
};
|
||||
|
||||
export type UpdateAgentMutationOptions = MutationOptions<
|
||||
Agent,
|
||||
UpdateAgentVariables,
|
||||
unknown,
|
||||
Agent,
|
||||
UpdateAgentVariables,
|
||||
unknown,
|
||||
DuplicateVersionError
|
||||
>;
|
||||
|
||||
|
|
|
|||
|
|
@ -101,7 +101,11 @@ export type AllPromptGroupsResponse = t.TPromptGroup[];
|
|||
export type ConversationTagsResponse = s.TConversationTag[];
|
||||
|
||||
export type VerifyToolAuthParams = { toolId: string };
|
||||
export type VerifyToolAuthResponse = { authenticated: boolean; message?: string | s.AuthType };
|
||||
export type VerifyToolAuthResponse = {
|
||||
authenticated: boolean;
|
||||
message?: string | s.AuthType;
|
||||
authTypes?: [string, s.AuthType][];
|
||||
};
|
||||
|
||||
export type GetToolCallParams = { conversationId: string };
|
||||
export type ToolCallResults = a.ToolCallResult[];
|
||||
|
|
|
|||
593
packages/data-provider/src/types/web.ts
Normal file
593
packages/data-provider/src/types/web.ts
Normal file
|
|
@ -0,0 +1,593 @@
|
|||
import type { Logger as WinstonLogger } from 'winston';
|
||||
import type { RunnableConfig } from '@langchain/core/runnables';
|
||||
|
||||
export type SearchRefType = 'search' | 'image' | 'news' | 'video' | 'ref';
|
||||
|
||||
export enum DATE_RANGE {
|
||||
PAST_HOUR = 'h',
|
||||
PAST_24_HOURS = 'd',
|
||||
PAST_WEEK = 'w',
|
||||
PAST_MONTH = 'm',
|
||||
PAST_YEAR = 'y',
|
||||
}
|
||||
|
||||
export type SearchProvider = 'serper' | 'searxng';
|
||||
export type RerankerType = 'infinity' | 'jina' | 'cohere' | 'none';
|
||||
|
||||
export interface Highlight {
|
||||
score: number;
|
||||
text: string;
|
||||
references?: UsedReferences;
|
||||
}
|
||||
|
||||
export type ProcessedSource = {
|
||||
content?: string;
|
||||
attribution?: string;
|
||||
references?: References;
|
||||
highlights?: Highlight[];
|
||||
processed?: boolean;
|
||||
};
|
||||
|
||||
export type ProcessedOrganic = OrganicResult & ProcessedSource;
|
||||
export type ProcessedTopStory = TopStoryResult & ProcessedSource;
|
||||
export type ValidSource = ProcessedOrganic | ProcessedTopStory;
|
||||
|
||||
export type ResultReference = {
|
||||
link: string;
|
||||
type: 'link' | 'image' | 'video';
|
||||
title?: string;
|
||||
attribution?: string;
|
||||
};
|
||||
export interface SearchResultData {
|
||||
turn?: number;
|
||||
organic?: ProcessedOrganic[];
|
||||
topStories?: ProcessedTopStory[];
|
||||
images?: ImageResult[];
|
||||
videos?: VideoResult[];
|
||||
places?: PlaceResult[];
|
||||
news?: NewsResult[];
|
||||
shopping?: ShoppingResult[];
|
||||
knowledgeGraph?: KnowledgeGraphResult;
|
||||
answerBox?: AnswerBoxResult;
|
||||
peopleAlsoAsk?: PeopleAlsoAskResult[];
|
||||
relatedSearches?: Array<{ query: string }>;
|
||||
references?: ResultReference[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface SearchResult {
|
||||
data?: SearchResultData;
|
||||
error?: string;
|
||||
success: boolean;
|
||||
}
|
||||
|
||||
export interface Source {
|
||||
link: string;
|
||||
html?: string;
|
||||
title?: string;
|
||||
snippet?: string;
|
||||
date?: string;
|
||||
}
|
||||
|
||||
export interface SearchConfig {
|
||||
searchProvider?: SearchProvider;
|
||||
serperApiKey?: string;
|
||||
searxngInstanceUrl?: string;
|
||||
searxngApiKey?: string;
|
||||
}
|
||||
|
||||
export type References = {
|
||||
links: MediaReference[];
|
||||
images: MediaReference[];
|
||||
videos: MediaReference[];
|
||||
};
|
||||
export interface ScrapeResult {
|
||||
url: string;
|
||||
error?: boolean;
|
||||
content: string;
|
||||
attribution?: string;
|
||||
references?: References;
|
||||
highlights?: Highlight[];
|
||||
}
|
||||
|
||||
export interface ProcessSourcesConfig {
|
||||
topResults?: number;
|
||||
strategies?: string[];
|
||||
filterContent?: boolean;
|
||||
reranker?: unknown;
|
||||
logger?: Logger;
|
||||
}
|
||||
|
||||
export interface FirecrawlConfig {
|
||||
firecrawlApiKey?: string;
|
||||
firecrawlApiUrl?: string;
|
||||
firecrawlFormats?: string[];
|
||||
}
|
||||
|
||||
export interface ScraperContentResult {
|
||||
content: string;
|
||||
}
|
||||
|
||||
export interface ScraperExtractionResult {
|
||||
no_extraction: ScraperContentResult;
|
||||
}
|
||||
|
||||
export interface JinaRerankerResult {
|
||||
index: number;
|
||||
relevance_score: number;
|
||||
document?: string | { text: string };
|
||||
}
|
||||
|
||||
export interface JinaRerankerResponse {
|
||||
model: string;
|
||||
usage: {
|
||||
total_tokens: number;
|
||||
};
|
||||
results: JinaRerankerResult[];
|
||||
}
|
||||
|
||||
export interface CohereRerankerResult {
|
||||
index: number;
|
||||
relevance_score: number;
|
||||
}
|
||||
|
||||
export interface CohereRerankerResponse {
|
||||
results: CohereRerankerResult[];
|
||||
id: string;
|
||||
meta: {
|
||||
api_version: {
|
||||
version: string;
|
||||
is_experimental: boolean;
|
||||
};
|
||||
billed_units: {
|
||||
search_units: number;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
export type SafeSearchLevel = 0 | 1 | 2;
|
||||
|
||||
export type Logger = WinstonLogger;
|
||||
export interface SearchToolConfig extends SearchConfig, ProcessSourcesConfig, FirecrawlConfig {
|
||||
logger?: Logger;
|
||||
safeSearch?: SafeSearchLevel;
|
||||
jinaApiKey?: string;
|
||||
cohereApiKey?: string;
|
||||
rerankerType?: RerankerType;
|
||||
onSearchResults?: (results: SearchResult, runnableConfig?: RunnableConfig) => void;
|
||||
onGetHighlights?: (link: string) => void;
|
||||
}
|
||||
export interface MediaReference {
|
||||
originalUrl: string;
|
||||
title?: string;
|
||||
text?: string;
|
||||
}
|
||||
|
||||
export type UsedReferences = {
|
||||
type: 'link' | 'image' | 'video';
|
||||
originalIndex: number;
|
||||
reference: MediaReference;
|
||||
}[];
|
||||
|
||||
/** Firecrawl */
|
||||
|
||||
export interface FirecrawlScrapeOptions {
|
||||
formats?: string[];
|
||||
includeTags?: string[];
|
||||
excludeTags?: string[];
|
||||
headers?: Record<string, string>;
|
||||
waitFor?: number;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export interface ScrapeMetadata {
|
||||
// Core source information
|
||||
sourceURL?: string;
|
||||
url?: string;
|
||||
scrapeId?: string;
|
||||
statusCode?: number;
|
||||
// Basic metadata
|
||||
title?: string;
|
||||
description?: string;
|
||||
language?: string;
|
||||
favicon?: string;
|
||||
viewport?: string;
|
||||
robots?: string;
|
||||
'theme-color'?: string;
|
||||
// Open Graph metadata
|
||||
'og:url'?: string;
|
||||
'og:title'?: string;
|
||||
'og:description'?: string;
|
||||
'og:type'?: string;
|
||||
'og:image'?: string;
|
||||
'og:image:width'?: string;
|
||||
'og:image:height'?: string;
|
||||
'og:site_name'?: string;
|
||||
ogUrl?: string;
|
||||
ogTitle?: string;
|
||||
ogDescription?: string;
|
||||
ogImage?: string;
|
||||
ogSiteName?: string;
|
||||
// Article metadata
|
||||
'article:author'?: string;
|
||||
'article:published_time'?: string;
|
||||
'article:modified_time'?: string;
|
||||
'article:section'?: string;
|
||||
'article:tag'?: string;
|
||||
'article:publisher'?: string;
|
||||
publishedTime?: string;
|
||||
modifiedTime?: string;
|
||||
// Twitter metadata
|
||||
'twitter:site'?: string | boolean | number | null;
|
||||
'twitter:creator'?: string;
|
||||
'twitter:card'?: string;
|
||||
'twitter:image'?: string;
|
||||
'twitter:dnt'?: string;
|
||||
'twitter:app:name:iphone'?: string;
|
||||
'twitter:app:id:iphone'?: string;
|
||||
'twitter:app:url:iphone'?: string;
|
||||
'twitter:app:name:ipad'?: string;
|
||||
'twitter:app:id:ipad'?: string;
|
||||
'twitter:app:url:ipad'?: string;
|
||||
'twitter:app:name:googleplay'?: string;
|
||||
'twitter:app:id:googleplay'?: string;
|
||||
'twitter:app:url:googleplay'?: string;
|
||||
// Facebook metadata
|
||||
'fb:app_id'?: string;
|
||||
// App links
|
||||
'al:ios:url'?: string;
|
||||
'al:ios:app_name'?: string;
|
||||
'al:ios:app_store_id'?: string;
|
||||
// Allow for additional properties that might be present
|
||||
[key: string]: string | number | boolean | null | undefined;
|
||||
}
|
||||
|
||||
export interface FirecrawlScrapeResponse {
|
||||
success: boolean;
|
||||
data?: {
|
||||
markdown?: string;
|
||||
html?: string;
|
||||
rawHtml?: string;
|
||||
screenshot?: string;
|
||||
links?: string[];
|
||||
metadata?: ScrapeMetadata;
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface FirecrawlScraperConfig {
|
||||
apiKey?: string;
|
||||
apiUrl?: string;
|
||||
formats?: string[];
|
||||
timeout?: number;
|
||||
logger?: Logger;
|
||||
}
|
||||
|
||||
export type GetSourcesParams = {
|
||||
query: string;
|
||||
date?: DATE_RANGE;
|
||||
country?: string;
|
||||
numResults?: number;
|
||||
safeSearch?: SearchToolConfig['safeSearch'];
|
||||
images?: boolean;
|
||||
videos?: boolean;
|
||||
news?: boolean;
|
||||
type?: 'search' | 'images' | 'videos' | 'news';
|
||||
};
|
||||
|
||||
/** Serper API */
|
||||
export interface VideoResult {
|
||||
title?: string;
|
||||
link?: string;
|
||||
snippet?: string;
|
||||
imageUrl?: string;
|
||||
duration?: string;
|
||||
source?: string;
|
||||
channel?: string;
|
||||
date?: string;
|
||||
position?: number;
|
||||
}
|
||||
|
||||
export interface PlaceResult {
|
||||
position?: number;
|
||||
name?: string;
|
||||
address?: string;
|
||||
latitude?: number;
|
||||
longitude?: number;
|
||||
rating?: number;
|
||||
ratingCount?: number;
|
||||
category?: string;
|
||||
identifier?: string;
|
||||
}
|
||||
|
||||
export interface NewsResult {
|
||||
title?: string;
|
||||
link?: string;
|
||||
snippet?: string;
|
||||
date?: string;
|
||||
source?: string;
|
||||
imageUrl?: string;
|
||||
position?: number;
|
||||
}
|
||||
|
||||
export interface ShoppingResult {
|
||||
title?: string;
|
||||
source?: string;
|
||||
link?: string;
|
||||
price?: string;
|
||||
delivery?: string;
|
||||
imageUrl?: string;
|
||||
rating?: number;
|
||||
ratingCount?: number;
|
||||
offers?: string;
|
||||
productId?: string;
|
||||
position?: number;
|
||||
}
|
||||
|
||||
export interface ScholarResult {
|
||||
title?: string;
|
||||
link?: string;
|
||||
publicationInfo?: string;
|
||||
snippet?: string;
|
||||
year?: number;
|
||||
citedBy?: number;
|
||||
}
|
||||
|
||||
export interface ImageResult {
|
||||
title?: string;
|
||||
imageUrl?: string;
|
||||
imageWidth?: number;
|
||||
imageHeight?: number;
|
||||
thumbnailUrl?: string;
|
||||
thumbnailWidth?: number;
|
||||
thumbnailHeight?: number;
|
||||
source?: string;
|
||||
domain?: string;
|
||||
link?: string;
|
||||
googleUrl?: string;
|
||||
position?: number;
|
||||
}
|
||||
|
||||
export interface SerperSearchPayload extends SerperSearchInput {
|
||||
/**
|
||||
* Search type/vertical
|
||||
* Options: "search" (web), "images", "news", "places", "videos"
|
||||
*/
|
||||
type?: 'search' | 'images' | 'news' | 'places' | 'videos';
|
||||
|
||||
/**
|
||||
* Starting index for search results pagination (used instead of page)
|
||||
*/
|
||||
start?: number;
|
||||
|
||||
/**
|
||||
* Filtering for safe search
|
||||
* Options: "off", "moderate", "active"
|
||||
*/
|
||||
safe?: 'off' | 'moderate' | 'active';
|
||||
}
|
||||
|
||||
export type SerperSearchParameters = Pick<SerperSearchPayload, 'q' | 'type'> & {
|
||||
engine: 'google';
|
||||
};
|
||||
|
||||
export interface OrganicResult {
|
||||
position?: number;
|
||||
title?: string;
|
||||
link: string;
|
||||
snippet?: string;
|
||||
date?: string;
|
||||
sitelinks?: Array<{
|
||||
title: string;
|
||||
link: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface TopStoryResult {
|
||||
title?: string;
|
||||
link: string;
|
||||
source?: string;
|
||||
date?: string;
|
||||
imageUrl?: string;
|
||||
}
|
||||
export interface KnowledgeGraphResult {
|
||||
title?: string;
|
||||
type?: string;
|
||||
imageUrl?: string;
|
||||
description?: string;
|
||||
descriptionSource?: string;
|
||||
descriptionLink?: string;
|
||||
attributes?: Record<string, string>;
|
||||
website?: string;
|
||||
}
|
||||
|
||||
export interface AnswerBoxResult {
|
||||
title?: string;
|
||||
snippet?: string;
|
||||
snippetHighlighted?: string[];
|
||||
link?: string;
|
||||
date?: string;
|
||||
}
|
||||
|
||||
export interface PeopleAlsoAskResult {
|
||||
question?: string;
|
||||
snippet?: string;
|
||||
title?: string;
|
||||
link?: string;
|
||||
}
|
||||
|
||||
export type RelatedSearches = Array<{ query: string }>;
|
||||
|
||||
export interface SerperSearchInput {
|
||||
/**
|
||||
* The search query string
|
||||
*/
|
||||
q: string;
|
||||
|
||||
/**
|
||||
* Country code for localized results
|
||||
* Examples: "us", "uk", "ca", "de", etc.
|
||||
*/
|
||||
gl?: string;
|
||||
|
||||
/**
|
||||
* Interface language
|
||||
* Examples: "en", "fr", "de", etc.
|
||||
*/
|
||||
hl?: string;
|
||||
|
||||
/**
|
||||
* Number of results to return (up to 100)
|
||||
*/
|
||||
num?: number;
|
||||
/**
|
||||
* Specific location for contextual results
|
||||
* Example: "New York, NY"
|
||||
*/
|
||||
location?: string;
|
||||
|
||||
/**
|
||||
* Search autocorrection setting
|
||||
*/
|
||||
autocorrect?: boolean;
|
||||
page?: number;
|
||||
/**
|
||||
* Date range for search results
|
||||
* Options: "h" (past hour), "d" (past 24 hours), "w" (past week),
|
||||
* "m" (past month), "y" (past year)
|
||||
* `qdr:${DATE_RANGE}`
|
||||
*/
|
||||
tbs?: string;
|
||||
}
|
||||
|
||||
export type SerperResultData = {
|
||||
searchParameters: SerperSearchPayload;
|
||||
organic?: OrganicResult[];
|
||||
topStories?: TopStoryResult[];
|
||||
images?: ImageResult[];
|
||||
videos?: VideoResult[];
|
||||
places?: PlaceResult[];
|
||||
news?: NewsResult[];
|
||||
shopping?: ShoppingResult[];
|
||||
peopleAlsoAsk?: PeopleAlsoAskResult[];
|
||||
relatedSearches?: RelatedSearches;
|
||||
knowledgeGraph?: KnowledgeGraphResult;
|
||||
answerBox?: AnswerBoxResult;
|
||||
credits?: number;
|
||||
};
|
||||
|
||||
/** SearXNG */
|
||||
|
||||
export interface SearxNGSearchPayload {
|
||||
/**
|
||||
* The search query string
|
||||
* Supports syntax specific to different search engines
|
||||
* Example: "site:github.com SearXNG"
|
||||
*/
|
||||
q: string;
|
||||
|
||||
/**
|
||||
* Comma-separated list of search categories
|
||||
* Example: "general,images,news"
|
||||
*/
|
||||
categories?: string;
|
||||
|
||||
/**
|
||||
* Comma-separated list of search engines to use
|
||||
* Example: "google,bing,duckduckgo"
|
||||
*/
|
||||
engines?: string;
|
||||
|
||||
/**
|
||||
* Code of the language for search results
|
||||
* Example: "en", "fr", "de", "es"
|
||||
*/
|
||||
language?: string;
|
||||
|
||||
/**
|
||||
* Search page number
|
||||
* Default: 1
|
||||
*/
|
||||
pageno?: number;
|
||||
|
||||
/**
|
||||
* Time range filter for search results
|
||||
* Options: "day", "month", "year"
|
||||
*/
|
||||
time_range?: 'day' | 'month' | 'year';
|
||||
|
||||
/**
|
||||
* Output format of results
|
||||
* Options: "json", "csv", "rss"
|
||||
*/
|
||||
format?: 'json' | 'csv' | 'rss';
|
||||
|
||||
/**
|
||||
* Open search results on new tab
|
||||
* Options: `0` (off), `1` (on)
|
||||
*/
|
||||
results_on_new_tab?: 0 | 1;
|
||||
|
||||
/**
|
||||
* Proxy image results through SearxNG
|
||||
* Options: true, false
|
||||
*/
|
||||
image_proxy?: boolean;
|
||||
|
||||
/**
|
||||
* Service for autocomplete suggestions
|
||||
* Options: "google", "dbpedia", "duckduckgo", "mwmbl",
|
||||
* "startpage", "wikipedia", "stract", "swisscows", "qwant"
|
||||
*/
|
||||
autocomplete?: string;
|
||||
|
||||
/**
|
||||
* Safe search filtering level
|
||||
* Options: "0" (off), "1" (moderate), "2" (strict)
|
||||
*/
|
||||
safesearch?: 0 | 1 | 2;
|
||||
|
||||
/**
|
||||
* Theme to use for results page
|
||||
* Default: "simple" (other themes may be available per instance)
|
||||
*/
|
||||
theme?: string;
|
||||
|
||||
/**
|
||||
* List of enabled plugins
|
||||
* Default: "Hash_plugin,Self_Information,Tracker_URL_remover,Ahmia_blacklist"
|
||||
*/
|
||||
enabled_plugins?: string;
|
||||
|
||||
/**
|
||||
* List of disabled plugins
|
||||
*/
|
||||
disabled_plugins?: string;
|
||||
|
||||
/**
|
||||
* List of enabled engines
|
||||
*/
|
||||
enabled_engines?: string;
|
||||
|
||||
/**
|
||||
* List of disabled engines
|
||||
*/
|
||||
disabled_engines?: string;
|
||||
}
|
||||
|
||||
export interface SearXNGResult {
|
||||
title?: string;
|
||||
url?: string;
|
||||
content?: string;
|
||||
publishedDate?: string;
|
||||
img_src?: string;
|
||||
}
|
||||
|
||||
export type ProcessSourcesFields = {
|
||||
result: SearchResult;
|
||||
numElements: number;
|
||||
query: string;
|
||||
news: boolean;
|
||||
proMode: boolean;
|
||||
onGetHighlights: SearchToolConfig['onGetHighlights'];
|
||||
};
|
||||
|
|
@ -1,5 +1,15 @@
|
|||
export const envVarRegex = /^\${(.+)}$/;
|
||||
|
||||
/** Extracts the environment variable name from a template literal string */
|
||||
export function extractVariableName(value: string): string | null {
|
||||
if (!value) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const match = value.trim().match(envVarRegex);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
/** Extracts the value of an environment variable from a string. */
|
||||
export function extractEnvVariable(value: string) {
|
||||
if (!value) {
|
||||
|
|
|
|||
270
packages/data-provider/src/web.ts
Normal file
270
packages/data-provider/src/web.ts
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
import type {
|
||||
ScraperTypes,
|
||||
RerankerTypes,
|
||||
TCustomConfig,
|
||||
SearchProviders,
|
||||
TWebSearchConfig,
|
||||
} from './config';
|
||||
import { extractVariableName } from './utils';
|
||||
import { SearchCategories } from './config';
|
||||
import { AuthType } from './schemas';
|
||||
|
||||
export function loadWebSearchConfig(
|
||||
config: TCustomConfig['webSearch'],
|
||||
): TCustomConfig['webSearch'] {
|
||||
const serperApiKey = config?.serperApiKey ?? '${SERPER_API_KEY}';
|
||||
const firecrawlApiKey = config?.firecrawlApiKey ?? '${FIRECRAWL_API_KEY}';
|
||||
const firecrawlApiUrl = config?.firecrawlApiUrl ?? '${FIRECRAWL_API_URL}';
|
||||
const jinaApiKey = config?.jinaApiKey ?? '${JINA_API_KEY}';
|
||||
const cohereApiKey = config?.cohereApiKey ?? '${COHERE_API_KEY}';
|
||||
const safeSearch = config?.safeSearch ?? true;
|
||||
|
||||
return {
|
||||
...config,
|
||||
safeSearch,
|
||||
jinaApiKey,
|
||||
cohereApiKey,
|
||||
serperApiKey,
|
||||
firecrawlApiKey,
|
||||
firecrawlApiUrl,
|
||||
};
|
||||
}
|
||||
|
||||
export type TWebSearchKeys =
|
||||
| 'serperApiKey'
|
||||
| 'firecrawlApiKey'
|
||||
| 'firecrawlApiUrl'
|
||||
| 'jinaApiKey'
|
||||
| 'cohereApiKey';
|
||||
|
||||
export type TWebSearchCategories =
|
||||
| SearchCategories.PROVIDERS
|
||||
| SearchCategories.SCRAPERS
|
||||
| SearchCategories.RERANKERS;
|
||||
|
||||
export const webSearchAuth = {
|
||||
providers: {
|
||||
serper: {
|
||||
serperApiKey: 1 as const,
|
||||
},
|
||||
},
|
||||
scrapers: {
|
||||
firecrawl: {
|
||||
firecrawlApiKey: 1 as const,
|
||||
/** Optional (0) */
|
||||
firecrawlApiUrl: 0 as const,
|
||||
},
|
||||
},
|
||||
rerankers: {
|
||||
jina: { jinaApiKey: 1 as const },
|
||||
cohere: { cohereApiKey: 1 as const },
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Extracts all API keys from the webSearchAuth configuration object
|
||||
*/
|
||||
export const webSearchKeys: TWebSearchKeys[] = [];
|
||||
|
||||
// Iterate through each category (providers, scrapers, rerankers)
|
||||
for (const category of Object.keys(webSearchAuth)) {
|
||||
const categoryObj = webSearchAuth[category as TWebSearchCategories];
|
||||
|
||||
// Iterate through each service within the category
|
||||
for (const service of Object.keys(categoryObj)) {
|
||||
const serviceObj = categoryObj[service as keyof typeof categoryObj];
|
||||
|
||||
// Extract the API keys from the service
|
||||
for (const key of Object.keys(serviceObj)) {
|
||||
webSearchKeys.push(key as TWebSearchKeys);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function extractWebSearchEnvVars({
|
||||
keys,
|
||||
config,
|
||||
}: {
|
||||
keys: TWebSearchKeys[];
|
||||
config: TCustomConfig['webSearch'] | undefined;
|
||||
}): string[] {
|
||||
if (!config) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const authFields: string[] = [];
|
||||
const relevantKeys = keys.filter((k) => k in config);
|
||||
|
||||
for (const key of relevantKeys) {
|
||||
const value = config[key];
|
||||
if (typeof value === 'string') {
|
||||
const varName = extractVariableName(value);
|
||||
if (varName) {
|
||||
authFields.push(varName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return authFields;
|
||||
}
|
||||
|
||||
/**
|
||||
* Type for web search authentication result
|
||||
*/
|
||||
export interface WebSearchAuthResult {
|
||||
/** Whether all required categories have at least one authenticated service */
|
||||
authenticated: boolean;
|
||||
/** Authentication type (user_provided or system_defined) by category */
|
||||
authTypes: [TWebSearchCategories, AuthType][];
|
||||
/** Original authentication values mapped to their respective keys */
|
||||
authResult: Partial<TWebSearchConfig>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads and verifies web search authentication values
|
||||
* @param params - Authentication parameters
|
||||
* @returns Authentication result
|
||||
*/
|
||||
export async function loadWebSearchAuth({
|
||||
userId,
|
||||
webSearchConfig,
|
||||
loadAuthValues,
|
||||
throwError = true,
|
||||
}: {
|
||||
userId: string;
|
||||
webSearchConfig: TCustomConfig['webSearch'];
|
||||
loadAuthValues: (params: {
|
||||
userId: string;
|
||||
authFields: string[];
|
||||
optional?: Set<string>;
|
||||
throwError?: boolean;
|
||||
}) => Promise<Record<string, string>>;
|
||||
throwError?: boolean;
|
||||
}): Promise<WebSearchAuthResult> {
|
||||
let authenticated = true;
|
||||
const authResult: Partial<TWebSearchConfig> = {};
|
||||
|
||||
/** Type-safe iterator for the category-service combinations */
|
||||
async function checkAuth<C extends TWebSearchCategories>(
|
||||
category: C,
|
||||
): Promise<[boolean, boolean]> {
|
||||
type ServiceType = keyof (typeof webSearchAuth)[C];
|
||||
let isUserProvided = false;
|
||||
|
||||
// Check if a specific service is specified in the config
|
||||
let specificService: ServiceType | undefined;
|
||||
if (category === SearchCategories.PROVIDERS && webSearchConfig?.searchProvider) {
|
||||
specificService = webSearchConfig.searchProvider as unknown as ServiceType;
|
||||
} else if (category === SearchCategories.SCRAPERS && webSearchConfig?.scraperType) {
|
||||
specificService = webSearchConfig.scraperType as unknown as ServiceType;
|
||||
} else if (category === SearchCategories.RERANKERS && webSearchConfig?.rerankerType) {
|
||||
specificService = webSearchConfig.rerankerType as unknown as ServiceType;
|
||||
}
|
||||
|
||||
// If a specific service is specified, only check that one
|
||||
const services = specificService
|
||||
? [specificService]
|
||||
: (Object.keys(webSearchAuth[category]) as ServiceType[]);
|
||||
|
||||
for (const service of services) {
|
||||
// Skip if the service doesn't exist in the webSearchAuth config
|
||||
if (!webSearchAuth[category][service]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const serviceConfig = webSearchAuth[category][service];
|
||||
|
||||
// Split keys into required and optional
|
||||
const requiredKeys: TWebSearchKeys[] = [];
|
||||
const optionalKeys: TWebSearchKeys[] = [];
|
||||
|
||||
for (const key in serviceConfig) {
|
||||
const typedKey = key as TWebSearchKeys;
|
||||
if (serviceConfig[typedKey as keyof typeof serviceConfig] === 1) {
|
||||
requiredKeys.push(typedKey);
|
||||
} else if (serviceConfig[typedKey as keyof typeof serviceConfig] === 0) {
|
||||
optionalKeys.push(typedKey);
|
||||
}
|
||||
}
|
||||
|
||||
if (requiredKeys.length === 0) continue;
|
||||
|
||||
const requiredAuthFields = extractWebSearchEnvVars({
|
||||
keys: requiredKeys,
|
||||
config: webSearchConfig,
|
||||
});
|
||||
const optionalAuthFields = extractWebSearchEnvVars({
|
||||
keys: optionalKeys,
|
||||
config: webSearchConfig,
|
||||
});
|
||||
if (requiredAuthFields.length !== requiredKeys.length) continue;
|
||||
|
||||
const allKeys = [...requiredKeys, ...optionalKeys];
|
||||
const allAuthFields = [...requiredAuthFields, ...optionalAuthFields];
|
||||
const optionalSet = new Set(optionalAuthFields);
|
||||
|
||||
try {
|
||||
const authValues = await loadAuthValues({
|
||||
userId,
|
||||
authFields: allAuthFields,
|
||||
optional: optionalSet,
|
||||
throwError,
|
||||
});
|
||||
|
||||
let allFieldsAuthenticated = true;
|
||||
for (let j = 0; j < allAuthFields.length; j++) {
|
||||
const field = allAuthFields[j];
|
||||
const value = authValues[field];
|
||||
const originalKey = allKeys[j];
|
||||
if (originalKey) authResult[originalKey] = value;
|
||||
if (!optionalSet.has(field) && !value) {
|
||||
allFieldsAuthenticated = false;
|
||||
break;
|
||||
}
|
||||
if (!isUserProvided && process.env[field] !== value) {
|
||||
isUserProvided = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!allFieldsAuthenticated) {
|
||||
continue;
|
||||
}
|
||||
if (category === SearchCategories.PROVIDERS) {
|
||||
authResult.searchProvider = service as SearchProviders;
|
||||
} else if (category === SearchCategories.SCRAPERS) {
|
||||
authResult.scraperType = service as ScraperTypes;
|
||||
} else if (category === SearchCategories.RERANKERS) {
|
||||
authResult.rerankerType = service as RerankerTypes;
|
||||
}
|
||||
return [true, isUserProvided];
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return [false, isUserProvided];
|
||||
}
|
||||
|
||||
const categories = [
|
||||
SearchCategories.PROVIDERS,
|
||||
SearchCategories.SCRAPERS,
|
||||
SearchCategories.RERANKERS,
|
||||
] as const;
|
||||
const authTypes: [TWebSearchCategories, AuthType][] = [];
|
||||
for (const category of categories) {
|
||||
const [isCategoryAuthenticated, isUserProvided] = await checkAuth(category);
|
||||
if (!isCategoryAuthenticated) {
|
||||
authenticated = false;
|
||||
authTypes.push([category, AuthType.USER_PROVIDED]);
|
||||
continue;
|
||||
}
|
||||
authTypes.push([category, isUserProvided ? AuthType.USER_PROVIDED : AuthType.SYSTEM_DEFINED]);
|
||||
}
|
||||
|
||||
authResult.safeSearch = webSearchConfig?.safeSearch ?? true;
|
||||
|
||||
return {
|
||||
authTypes,
|
||||
authResult,
|
||||
authenticated,
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue