mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-10 20:48:54 +01:00
🔍 feat: Add Serper as Scraper Provider and Firecrawl Version Support (#9984)
* 🔧 chore: Update @librechat/agents to v2.4.84 in package.json and package-lock.json
* feat: Serper as new scraperProvider for Web Search and add firecrawlVersion support
* fix: TWebSearchKeys and ensure unique API keys extraction
* chore: Add build:packages script to streamline package builds
This commit is contained in:
parent
857c054a9a
commit
31a283a4fe
14 changed files with 247 additions and 47 deletions
|
|
@ -80,7 +80,7 @@
|
|||
"@azure/storage-blob": "^12.27.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.62",
|
||||
"@librechat/agents": "^2.4.83",
|
||||
"@librechat/agents": "^2.4.84",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.17.1",
|
||||
"axios": "^1.12.1",
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
import { webSearchAuth } from '@librechat/data-schemas';
|
||||
import { SafeSearchTypes, AuthType } from 'librechat-data-provider';
|
||||
import type {
|
||||
ScraperTypes,
|
||||
ScraperProviders,
|
||||
TWebSearchConfig,
|
||||
SearchProviders,
|
||||
TCustomConfig,
|
||||
RerankerTypes,
|
||||
SearchProviders,
|
||||
TWebSearchConfig,
|
||||
} from 'librechat-data-provider';
|
||||
import { loadWebSearchAuth, extractWebSearchEnvVars } from './web';
|
||||
|
||||
|
|
@ -119,7 +119,7 @@ describe('web.ts', () => {
|
|||
}
|
||||
|
||||
expect(result.authResult).toHaveProperty('searchProvider', 'serper');
|
||||
expect(result.authResult).toHaveProperty('scraperType', 'firecrawl');
|
||||
expect(result.authResult).toHaveProperty('scraperProvider', 'firecrawl');
|
||||
expect(['jina', 'cohere']).toContain(result.authResult.rerankerType as string);
|
||||
});
|
||||
|
||||
|
|
@ -288,7 +288,7 @@ describe('web.ts', () => {
|
|||
|
||||
// Check that the correct service types are set
|
||||
expect(result.authResult.searchProvider).toBe('serper' as SearchProviders);
|
||||
expect(result.authResult.scraperType).toBe('firecrawl' as ScraperTypes);
|
||||
expect(result.authResult.scraperProvider).toBe('firecrawl' as ScraperProviders);
|
||||
// One of the rerankers should be set
|
||||
expect(['jina', 'cohere']).toContain(result.authResult.rerankerType as string);
|
||||
});
|
||||
|
|
@ -330,7 +330,7 @@ describe('web.ts', () => {
|
|||
|
||||
// Should have set values for all categories
|
||||
expect(result.authResult.searchProvider).toBeDefined();
|
||||
expect(result.authResult.scraperType).toBeDefined();
|
||||
expect(result.authResult.scraperProvider).toBeDefined();
|
||||
expect(result.authResult.rerankerType).toBeDefined();
|
||||
});
|
||||
|
||||
|
|
@ -359,7 +359,7 @@ describe('web.ts', () => {
|
|||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
// Specify which services to use
|
||||
searchProvider: 'serper' as SearchProviders,
|
||||
scraperType: 'firecrawl' as ScraperTypes,
|
||||
scraperProvider: 'firecrawl' as ScraperProviders,
|
||||
rerankerType: 'jina' as RerankerTypes,
|
||||
};
|
||||
|
||||
|
|
@ -394,7 +394,7 @@ describe('web.ts', () => {
|
|||
expect(result.authResult).toHaveProperty('firecrawlApiUrl');
|
||||
expect(result.authResult).toHaveProperty('jinaApiKey');
|
||||
expect(result.authResult).toHaveProperty('searchProvider');
|
||||
expect(result.authResult).toHaveProperty('scraperType');
|
||||
expect(result.authResult).toHaveProperty('scraperProvider');
|
||||
expect(result.authResult).toHaveProperty('rerankerType');
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
|
|
@ -419,7 +419,7 @@ describe('web.ts', () => {
|
|||
expect(result.authResult).toHaveProperty('firecrawlApiUrl', 'https://api.firecrawl.dev');
|
||||
expect(result.authResult).toHaveProperty('jinaApiKey', 'system-jina-key');
|
||||
expect(result.authResult).toHaveProperty('searchProvider', 'serper');
|
||||
expect(result.authResult).toHaveProperty('scraperType', 'firecrawl');
|
||||
expect(result.authResult).toHaveProperty('scraperProvider', 'firecrawl');
|
||||
expect(result.authResult).toHaveProperty('rerankerType', 'jina');
|
||||
|
||||
// Restore original env
|
||||
|
|
@ -452,7 +452,7 @@ describe('web.ts', () => {
|
|||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
// Specify which services to use
|
||||
searchProvider: 'serper' as SearchProviders,
|
||||
scraperType: 'firecrawl' as ScraperTypes,
|
||||
scraperProvider: 'firecrawl' as ScraperProviders,
|
||||
rerankerType: 'jina' as RerankerTypes, // Only Jina will be checked
|
||||
};
|
||||
|
||||
|
|
@ -492,7 +492,7 @@ describe('web.ts', () => {
|
|||
|
||||
// Verify the service types are set correctly
|
||||
expect(result.authResult).toHaveProperty('searchProvider', 'serper');
|
||||
expect(result.authResult).toHaveProperty('scraperType', 'firecrawl');
|
||||
expect(result.authResult).toHaveProperty('scraperProvider', 'firecrawl');
|
||||
expect(result.authResult).toHaveProperty('rerankerType', 'jina');
|
||||
|
||||
// Restore original env
|
||||
|
|
@ -722,8 +722,8 @@ describe('web.ts', () => {
|
|||
expect(providerCalls.length).toBe(1);
|
||||
});
|
||||
|
||||
it('should only check the specified scraperType', async () => {
|
||||
// Initialize a webSearchConfig with a specific scraperType
|
||||
it('should only check the specified scraperProvider', async () => {
|
||||
// Initialize a webSearchConfig with a specific scraperProvider
|
||||
const webSearchConfig: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
searxngInstanceUrl: '${SEARXNG_INSTANCE_URL}',
|
||||
|
|
@ -734,7 +734,7 @@ describe('web.ts', () => {
|
|||
jinaApiUrl: '${JINA_API_URL}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
scraperType: 'firecrawl' as ScraperTypes,
|
||||
scraperProvider: 'firecrawl' as ScraperProviders,
|
||||
};
|
||||
|
||||
// Mock successful authentication
|
||||
|
|
@ -754,7 +754,7 @@ describe('web.ts', () => {
|
|||
});
|
||||
|
||||
expect(result.authenticated).toBe(true);
|
||||
expect(result.authResult.scraperType).toBe('firecrawl');
|
||||
expect(result.authResult.scraperProvider).toBe('firecrawl');
|
||||
|
||||
// Verify that only FIRECRAWL_API_KEY and FIRECRAWL_API_URL were requested for the scrapers category
|
||||
const scraperCalls = mockLoadAuthValues.mock.calls.filter((call) =>
|
||||
|
|
@ -933,7 +933,7 @@ describe('web.ts', () => {
|
|||
|
||||
// Should have set values for all categories
|
||||
expect(result.authResult.searchProvider).toBeDefined();
|
||||
expect(result.authResult.scraperType).toBeDefined();
|
||||
expect(result.authResult.scraperProvider).toBeDefined();
|
||||
expect(result.authResult.rerankerType).toBeDefined();
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -4,14 +4,14 @@ import {
|
|||
SearchCategories,
|
||||
extractVariableName,
|
||||
} from 'librechat-data-provider';
|
||||
import { webSearchAuth } from '@librechat/data-schemas';
|
||||
import type {
|
||||
ScraperTypes,
|
||||
RerankerTypes,
|
||||
TCustomConfig,
|
||||
SearchProviders,
|
||||
ScraperProviders,
|
||||
TWebSearchConfig,
|
||||
} from 'librechat-data-provider';
|
||||
import { webSearchAuth } from '@librechat/data-schemas';
|
||||
import type { TWebSearchKeys, TWebSearchCategories } from '@librechat/data-schemas';
|
||||
|
||||
export function extractWebSearchEnvVars({
|
||||
|
|
@ -88,8 +88,8 @@ export async function loadWebSearchAuth({
|
|||
let specificService: ServiceType | undefined;
|
||||
if (category === SearchCategories.PROVIDERS && webSearchConfig?.searchProvider) {
|
||||
specificService = webSearchConfig.searchProvider as unknown as ServiceType;
|
||||
} else if (category === SearchCategories.SCRAPERS && webSearchConfig?.scraperType) {
|
||||
specificService = webSearchConfig.scraperType as unknown as ServiceType;
|
||||
} else if (category === SearchCategories.SCRAPERS && webSearchConfig?.scraperProvider) {
|
||||
specificService = webSearchConfig.scraperProvider as unknown as ServiceType;
|
||||
} else if (category === SearchCategories.RERANKERS && webSearchConfig?.rerankerType) {
|
||||
specificService = webSearchConfig.rerankerType as unknown as ServiceType;
|
||||
}
|
||||
|
|
@ -165,7 +165,7 @@ export async function loadWebSearchAuth({
|
|||
if (category === SearchCategories.PROVIDERS) {
|
||||
authResult.searchProvider = service as SearchProviders;
|
||||
} else if (category === SearchCategories.SCRAPERS) {
|
||||
authResult.scraperType = service as ScraperTypes;
|
||||
authResult.scraperProvider = service as ScraperProviders;
|
||||
} else if (category === SearchCategories.RERANKERS) {
|
||||
authResult.rerankerType = service as RerankerTypes;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -650,7 +650,7 @@ export type TStartupConfig = {
|
|||
minPasswordLength?: number;
|
||||
webSearch?: {
|
||||
searchProvider?: SearchProviders;
|
||||
scraperType?: ScraperTypes;
|
||||
scraperProvider?: ScraperProviders;
|
||||
rerankerType?: RerankerTypes;
|
||||
};
|
||||
mcpServers?: Record<
|
||||
|
|
@ -689,7 +689,7 @@ export enum SearchProviders {
|
|||
SEARXNG = 'searxng',
|
||||
}
|
||||
|
||||
export enum ScraperTypes {
|
||||
export enum ScraperProviders {
|
||||
FIRECRAWL = 'firecrawl',
|
||||
SERPER = 'serper',
|
||||
}
|
||||
|
|
@ -711,11 +711,12 @@ export const webSearchSchema = z.object({
|
|||
searxngApiKey: z.string().optional().default('${SEARXNG_API_KEY}'),
|
||||
firecrawlApiKey: z.string().optional().default('${FIRECRAWL_API_KEY}'),
|
||||
firecrawlApiUrl: z.string().optional().default('${FIRECRAWL_API_URL}'),
|
||||
firecrawlVersion: z.string().optional().default('${FIRECRAWL_VERSION}'),
|
||||
jinaApiKey: z.string().optional().default('${JINA_API_KEY}'),
|
||||
jinaApiUrl: z.string().optional().default('${JINA_API_URL}'),
|
||||
cohereApiKey: z.string().optional().default('${COHERE_API_KEY}'),
|
||||
searchProvider: z.nativeEnum(SearchProviders).optional(),
|
||||
scraperType: z.nativeEnum(ScraperTypes).optional(),
|
||||
scraperProvider: z.nativeEnum(ScraperProviders).optional(),
|
||||
rerankerType: z.nativeEnum(RerankerTypes).optional(),
|
||||
scraperTimeout: z.number().optional(),
|
||||
safeSearch: z.nativeEnum(SafeSearchTypes).default(SafeSearchTypes.MODERATE),
|
||||
|
|
|
|||
173
packages/data-schemas/src/app/web.spec.ts
Normal file
173
packages/data-schemas/src/app/web.spec.ts
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
import { SafeSearchTypes, SearchProviders, ScraperProviders } from 'librechat-data-provider';
|
||||
import type { TCustomConfig } from 'librechat-data-provider';
|
||||
import { loadWebSearchConfig } from './web';
|
||||
|
||||
describe('loadWebSearchConfig', () => {
|
||||
describe('firecrawlVersion', () => {
|
||||
it('should use provided firecrawlVersion when specified', () => {
|
||||
const config: TCustomConfig['webSearch'] = {
|
||||
firecrawlVersion: 'v2',
|
||||
};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.firecrawlVersion).toBe('v2');
|
||||
});
|
||||
|
||||
it('should default to ${FIRECRAWL_VERSION} when not provided', () => {
|
||||
const config: TCustomConfig['webSearch'] = {};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.firecrawlVersion).toBe('${FIRECRAWL_VERSION}');
|
||||
});
|
||||
|
||||
it('should default to ${FIRECRAWL_VERSION} when config is undefined', () => {
|
||||
const result = loadWebSearchConfig(undefined);
|
||||
|
||||
expect(result?.firecrawlVersion).toBe('${FIRECRAWL_VERSION}');
|
||||
});
|
||||
|
||||
it('should preserve custom firecrawlVersion value', () => {
|
||||
const config: TCustomConfig['webSearch'] = {
|
||||
firecrawlVersion: 'v1',
|
||||
};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.firecrawlVersion).toBe('v1');
|
||||
});
|
||||
});
|
||||
|
||||
describe('all config fields', () => {
|
||||
it('should apply defaults for all fields when config is empty', () => {
|
||||
const config: TCustomConfig['webSearch'] = {};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result).toEqual({
|
||||
serperApiKey: '${SERPER_API_KEY}',
|
||||
searxngInstanceUrl: '${SEARXNG_INSTANCE_URL}',
|
||||
searxngApiKey: '${SEARXNG_API_KEY}',
|
||||
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
|
||||
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
|
||||
firecrawlVersion: '${FIRECRAWL_VERSION}',
|
||||
jinaApiKey: '${JINA_API_KEY}',
|
||||
jinaApiUrl: '${JINA_API_URL}',
|
||||
cohereApiKey: '${COHERE_API_KEY}',
|
||||
safeSearch: SafeSearchTypes.MODERATE,
|
||||
});
|
||||
});
|
||||
|
||||
it('should preserve provided config values and merge with defaults', () => {
|
||||
const config: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: 'custom-serper-key',
|
||||
firecrawlApiKey: 'custom-firecrawl-key',
|
||||
firecrawlVersion: 'v2',
|
||||
safeSearch: SafeSearchTypes.STRICT,
|
||||
};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.serperApiKey).toBe('custom-serper-key');
|
||||
expect(result?.firecrawlApiKey).toBe('custom-firecrawl-key');
|
||||
expect(result?.firecrawlVersion).toBe('v2');
|
||||
expect(result?.safeSearch).toBe(SafeSearchTypes.STRICT);
|
||||
expect(result?.jinaApiKey).toBe('${JINA_API_KEY}');
|
||||
});
|
||||
|
||||
it('should preserve additional fields from input config', () => {
|
||||
const config: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: 'test-key',
|
||||
scraperProvider: ScraperProviders.SERPER,
|
||||
searchProvider: SearchProviders.SERPER,
|
||||
};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.scraperProvider).toBe('serper');
|
||||
expect(result?.searchProvider).toBe('serper');
|
||||
expect(result?.serperApiKey).toBe('test-key');
|
||||
});
|
||||
});
|
||||
|
||||
describe('safeSearch', () => {
|
||||
it('should default to MODERATE when not provided', () => {
|
||||
const config: TCustomConfig['webSearch'] = {};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.safeSearch).toBe(SafeSearchTypes.MODERATE);
|
||||
});
|
||||
|
||||
it('should preserve OFF value', () => {
|
||||
const config: TCustomConfig['webSearch'] = {
|
||||
safeSearch: SafeSearchTypes.OFF,
|
||||
};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.safeSearch).toBe(SafeSearchTypes.OFF);
|
||||
});
|
||||
|
||||
it('should preserve STRICT value', () => {
|
||||
const config: TCustomConfig['webSearch'] = {
|
||||
safeSearch: SafeSearchTypes.STRICT,
|
||||
};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.safeSearch).toBe(SafeSearchTypes.STRICT);
|
||||
});
|
||||
});
|
||||
|
||||
describe('API keys', () => {
|
||||
it('should apply default placeholders for all API keys', () => {
|
||||
const result = loadWebSearchConfig({});
|
||||
|
||||
expect(result?.serperApiKey).toBe('${SERPER_API_KEY}');
|
||||
expect(result?.searxngApiKey).toBe('${SEARXNG_API_KEY}');
|
||||
expect(result?.firecrawlApiKey).toBe('${FIRECRAWL_API_KEY}');
|
||||
expect(result?.jinaApiKey).toBe('${JINA_API_KEY}');
|
||||
expect(result?.cohereApiKey).toBe('${COHERE_API_KEY}');
|
||||
});
|
||||
|
||||
it('should preserve custom API keys', () => {
|
||||
const config: TCustomConfig['webSearch'] = {
|
||||
serperApiKey: 'actual-serper-key',
|
||||
jinaApiKey: 'actual-jina-key',
|
||||
cohereApiKey: 'actual-cohere-key',
|
||||
};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.serperApiKey).toBe('actual-serper-key');
|
||||
expect(result?.jinaApiKey).toBe('actual-jina-key');
|
||||
expect(result?.cohereApiKey).toBe('actual-cohere-key');
|
||||
});
|
||||
});
|
||||
|
||||
describe('URLs', () => {
|
||||
it('should apply default placeholders for URLs', () => {
|
||||
const result = loadWebSearchConfig({});
|
||||
|
||||
expect(result?.searxngInstanceUrl).toBe('${SEARXNG_INSTANCE_URL}');
|
||||
expect(result?.firecrawlApiUrl).toBe('${FIRECRAWL_API_URL}');
|
||||
expect(result?.jinaApiUrl).toBe('${JINA_API_URL}');
|
||||
});
|
||||
|
||||
it('should preserve custom URLs', () => {
|
||||
const config: TCustomConfig['webSearch'] = {
|
||||
searxngInstanceUrl: 'https://custom-searxng.com',
|
||||
firecrawlApiUrl: 'https://custom-firecrawl.com',
|
||||
jinaApiUrl: 'https://custom-jina.com',
|
||||
};
|
||||
|
||||
const result = loadWebSearchConfig(config);
|
||||
|
||||
expect(result?.searxngInstanceUrl).toBe('https://custom-searxng.com');
|
||||
expect(result?.firecrawlApiUrl).toBe('https://custom-firecrawl.com');
|
||||
expect(result?.jinaApiUrl).toBe('https://custom-jina.com');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -18,6 +18,10 @@ export const webSearchAuth = {
|
|||
firecrawlApiKey: 1 as const,
|
||||
/** Optional (0) */
|
||||
firecrawlApiUrl: 0 as const,
|
||||
firecrawlVersion: 0 as const,
|
||||
},
|
||||
serper: {
|
||||
serperApiKey: 1 as const,
|
||||
},
|
||||
},
|
||||
rerankers: {
|
||||
|
|
@ -31,10 +35,10 @@ export const webSearchAuth = {
|
|||
};
|
||||
|
||||
/**
|
||||
* Extracts all API keys from the webSearchAuth configuration object
|
||||
* Extracts all unique API keys from the webSearchAuth configuration object
|
||||
*/
|
||||
export function getWebSearchKeys(): TWebSearchKeys[] {
|
||||
const keys: TWebSearchKeys[] = [];
|
||||
const keysSet = new Set<TWebSearchKeys>();
|
||||
|
||||
// Iterate through each category (providers, scrapers, rerankers)
|
||||
for (const category of Object.keys(webSearchAuth)) {
|
||||
|
|
@ -44,14 +48,14 @@ export function getWebSearchKeys(): TWebSearchKeys[] {
|
|||
for (const service of Object.keys(categoryObj)) {
|
||||
const serviceObj = categoryObj[service as keyof typeof categoryObj];
|
||||
|
||||
// Extract the API keys from the service
|
||||
// Extract the API keys from the service and add to set for deduplication
|
||||
for (const key of Object.keys(serviceObj)) {
|
||||
keys.push(key as TWebSearchKeys);
|
||||
keysSet.add(key as TWebSearchKeys);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return keys;
|
||||
return Array.from(keysSet);
|
||||
}
|
||||
|
||||
export const webSearchKeys: TWebSearchKeys[] = getWebSearchKeys();
|
||||
|
|
@ -64,6 +68,7 @@ export function loadWebSearchConfig(
|
|||
const searxngApiKey = config?.searxngApiKey ?? '${SEARXNG_API_KEY}';
|
||||
const firecrawlApiKey = config?.firecrawlApiKey ?? '${FIRECRAWL_API_KEY}';
|
||||
const firecrawlApiUrl = config?.firecrawlApiUrl ?? '${FIRECRAWL_API_URL}';
|
||||
const firecrawlVersion = config?.firecrawlVersion ?? '${FIRECRAWL_VERSION}';
|
||||
const jinaApiKey = config?.jinaApiKey ?? '${JINA_API_KEY}';
|
||||
const jinaApiUrl = config?.jinaApiUrl ?? '${JINA_API_URL}';
|
||||
const cohereApiKey = config?.cohereApiKey ?? '${COHERE_API_KEY}';
|
||||
|
|
@ -76,9 +81,10 @@ export function loadWebSearchConfig(
|
|||
jinaApiUrl,
|
||||
cohereApiKey,
|
||||
serperApiKey,
|
||||
searxngInstanceUrl,
|
||||
searxngApiKey,
|
||||
firecrawlApiKey,
|
||||
firecrawlApiUrl,
|
||||
firecrawlVersion,
|
||||
searxngInstanceUrl,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ export type TWebSearchKeys =
|
|||
| 'searxngApiKey'
|
||||
| 'firecrawlApiKey'
|
||||
| 'firecrawlApiUrl'
|
||||
| 'firecrawlVersion'
|
||||
| 'jinaApiKey'
|
||||
| 'jinaApiUrl'
|
||||
| 'cohereApiKey';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue