🔍 feat: Add Serper as Scraper Provider and Firecrawl Version Support (#9984)

* 🔧 chore: Update @librechat/agents to v2.4.84 in package.json and package-lock.json

* feat: Serper as new scraperProvider for Web Search and add firecrawlVersion support

* fix: TWebSearchKeys and ensure unique API keys extraction

* chore: Add build:packages script to streamline package builds
This commit is contained in:
Danny Avila 2025-10-05 20:34:05 -04:00 committed by GitHub
parent 857c054a9a
commit 31a283a4fe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 247 additions and 47 deletions

View file

@ -80,7 +80,7 @@
"@azure/storage-blob": "^12.27.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.62",
"@librechat/agents": "^2.4.83",
"@librechat/agents": "^2.4.84",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.17.1",
"axios": "^1.12.1",

View file

@ -1,11 +1,11 @@
import { webSearchAuth } from '@librechat/data-schemas';
import { SafeSearchTypes, AuthType } from 'librechat-data-provider';
import type {
ScraperTypes,
ScraperProviders,
TWebSearchConfig,
SearchProviders,
TCustomConfig,
RerankerTypes,
SearchProviders,
TWebSearchConfig,
} from 'librechat-data-provider';
import { loadWebSearchAuth, extractWebSearchEnvVars } from './web';
@ -119,7 +119,7 @@ describe('web.ts', () => {
}
expect(result.authResult).toHaveProperty('searchProvider', 'serper');
expect(result.authResult).toHaveProperty('scraperType', 'firecrawl');
expect(result.authResult).toHaveProperty('scraperProvider', 'firecrawl');
expect(['jina', 'cohere']).toContain(result.authResult.rerankerType as string);
});
@ -288,7 +288,7 @@ describe('web.ts', () => {
// Check that the correct service types are set
expect(result.authResult.searchProvider).toBe('serper' as SearchProviders);
expect(result.authResult.scraperType).toBe('firecrawl' as ScraperTypes);
expect(result.authResult.scraperProvider).toBe('firecrawl' as ScraperProviders);
// One of the rerankers should be set
expect(['jina', 'cohere']).toContain(result.authResult.rerankerType as string);
});
@ -330,7 +330,7 @@ describe('web.ts', () => {
// Should have set values for all categories
expect(result.authResult.searchProvider).toBeDefined();
expect(result.authResult.scraperType).toBeDefined();
expect(result.authResult.scraperProvider).toBeDefined();
expect(result.authResult.rerankerType).toBeDefined();
});
@ -359,7 +359,7 @@ describe('web.ts', () => {
safeSearch: SafeSearchTypes.MODERATE,
// Specify which services to use
searchProvider: 'serper' as SearchProviders,
scraperType: 'firecrawl' as ScraperTypes,
scraperProvider: 'firecrawl' as ScraperProviders,
rerankerType: 'jina' as RerankerTypes,
};
@ -394,7 +394,7 @@ describe('web.ts', () => {
expect(result.authResult).toHaveProperty('firecrawlApiUrl');
expect(result.authResult).toHaveProperty('jinaApiKey');
expect(result.authResult).toHaveProperty('searchProvider');
expect(result.authResult).toHaveProperty('scraperType');
expect(result.authResult).toHaveProperty('scraperProvider');
expect(result.authResult).toHaveProperty('rerankerType');
expect(result.authenticated).toBe(true);
@ -419,7 +419,7 @@ describe('web.ts', () => {
expect(result.authResult).toHaveProperty('firecrawlApiUrl', 'https://api.firecrawl.dev');
expect(result.authResult).toHaveProperty('jinaApiKey', 'system-jina-key');
expect(result.authResult).toHaveProperty('searchProvider', 'serper');
expect(result.authResult).toHaveProperty('scraperType', 'firecrawl');
expect(result.authResult).toHaveProperty('scraperProvider', 'firecrawl');
expect(result.authResult).toHaveProperty('rerankerType', 'jina');
// Restore original env
@ -452,7 +452,7 @@ describe('web.ts', () => {
safeSearch: SafeSearchTypes.MODERATE,
// Specify which services to use
searchProvider: 'serper' as SearchProviders,
scraperType: 'firecrawl' as ScraperTypes,
scraperProvider: 'firecrawl' as ScraperProviders,
rerankerType: 'jina' as RerankerTypes, // Only Jina will be checked
};
@ -492,7 +492,7 @@ describe('web.ts', () => {
// Verify the service types are set correctly
expect(result.authResult).toHaveProperty('searchProvider', 'serper');
expect(result.authResult).toHaveProperty('scraperType', 'firecrawl');
expect(result.authResult).toHaveProperty('scraperProvider', 'firecrawl');
expect(result.authResult).toHaveProperty('rerankerType', 'jina');
// Restore original env
@ -722,8 +722,8 @@ describe('web.ts', () => {
expect(providerCalls.length).toBe(1);
});
it('should only check the specified scraperType', async () => {
// Initialize a webSearchConfig with a specific scraperType
it('should only check the specified scraperProvider', async () => {
// Initialize a webSearchConfig with a specific scraperProvider
const webSearchConfig: TCustomConfig['webSearch'] = {
serperApiKey: '${SERPER_API_KEY}',
searxngInstanceUrl: '${SEARXNG_INSTANCE_URL}',
@ -734,7 +734,7 @@ describe('web.ts', () => {
jinaApiUrl: '${JINA_API_URL}',
cohereApiKey: '${COHERE_API_KEY}',
safeSearch: SafeSearchTypes.MODERATE,
scraperType: 'firecrawl' as ScraperTypes,
scraperProvider: 'firecrawl' as ScraperProviders,
};
// Mock successful authentication
@ -754,7 +754,7 @@ describe('web.ts', () => {
});
expect(result.authenticated).toBe(true);
expect(result.authResult.scraperType).toBe('firecrawl');
expect(result.authResult.scraperProvider).toBe('firecrawl');
// Verify that only FIRECRAWL_API_KEY and FIRECRAWL_API_URL were requested for the scrapers category
const scraperCalls = mockLoadAuthValues.mock.calls.filter((call) =>
@ -933,7 +933,7 @@ describe('web.ts', () => {
// Should have set values for all categories
expect(result.authResult.searchProvider).toBeDefined();
expect(result.authResult.scraperType).toBeDefined();
expect(result.authResult.scraperProvider).toBeDefined();
expect(result.authResult.rerankerType).toBeDefined();
});

View file

@ -4,14 +4,14 @@ import {
SearchCategories,
extractVariableName,
} from 'librechat-data-provider';
import { webSearchAuth } from '@librechat/data-schemas';
import type {
ScraperTypes,
RerankerTypes,
TCustomConfig,
SearchProviders,
ScraperProviders,
TWebSearchConfig,
} from 'librechat-data-provider';
import { webSearchAuth } from '@librechat/data-schemas';
import type { TWebSearchKeys, TWebSearchCategories } from '@librechat/data-schemas';
export function extractWebSearchEnvVars({
@ -88,8 +88,8 @@ export async function loadWebSearchAuth({
let specificService: ServiceType | undefined;
if (category === SearchCategories.PROVIDERS && webSearchConfig?.searchProvider) {
specificService = webSearchConfig.searchProvider as unknown as ServiceType;
} else if (category === SearchCategories.SCRAPERS && webSearchConfig?.scraperType) {
specificService = webSearchConfig.scraperType as unknown as ServiceType;
} else if (category === SearchCategories.SCRAPERS && webSearchConfig?.scraperProvider) {
specificService = webSearchConfig.scraperProvider as unknown as ServiceType;
} else if (category === SearchCategories.RERANKERS && webSearchConfig?.rerankerType) {
specificService = webSearchConfig.rerankerType as unknown as ServiceType;
}
@ -165,7 +165,7 @@ export async function loadWebSearchAuth({
if (category === SearchCategories.PROVIDERS) {
authResult.searchProvider = service as SearchProviders;
} else if (category === SearchCategories.SCRAPERS) {
authResult.scraperType = service as ScraperTypes;
authResult.scraperProvider = service as ScraperProviders;
} else if (category === SearchCategories.RERANKERS) {
authResult.rerankerType = service as RerankerTypes;
}

View file

@ -650,7 +650,7 @@ export type TStartupConfig = {
minPasswordLength?: number;
webSearch?: {
searchProvider?: SearchProviders;
scraperType?: ScraperTypes;
scraperProvider?: ScraperProviders;
rerankerType?: RerankerTypes;
};
mcpServers?: Record<
@ -689,7 +689,7 @@ export enum SearchProviders {
SEARXNG = 'searxng',
}
export enum ScraperTypes {
export enum ScraperProviders {
FIRECRAWL = 'firecrawl',
SERPER = 'serper',
}
@ -711,11 +711,12 @@ export const webSearchSchema = z.object({
searxngApiKey: z.string().optional().default('${SEARXNG_API_KEY}'),
firecrawlApiKey: z.string().optional().default('${FIRECRAWL_API_KEY}'),
firecrawlApiUrl: z.string().optional().default('${FIRECRAWL_API_URL}'),
firecrawlVersion: z.string().optional().default('${FIRECRAWL_VERSION}'),
jinaApiKey: z.string().optional().default('${JINA_API_KEY}'),
jinaApiUrl: z.string().optional().default('${JINA_API_URL}'),
cohereApiKey: z.string().optional().default('${COHERE_API_KEY}'),
searchProvider: z.nativeEnum(SearchProviders).optional(),
scraperType: z.nativeEnum(ScraperTypes).optional(),
scraperProvider: z.nativeEnum(ScraperProviders).optional(),
rerankerType: z.nativeEnum(RerankerTypes).optional(),
scraperTimeout: z.number().optional(),
safeSearch: z.nativeEnum(SafeSearchTypes).default(SafeSearchTypes.MODERATE),

View file

@ -0,0 +1,173 @@
import { SafeSearchTypes, SearchProviders, ScraperProviders } from 'librechat-data-provider';
import type { TCustomConfig } from 'librechat-data-provider';
import { loadWebSearchConfig } from './web';
describe('loadWebSearchConfig', () => {
describe('firecrawlVersion', () => {
it('should use provided firecrawlVersion when specified', () => {
const config: TCustomConfig['webSearch'] = {
firecrawlVersion: 'v2',
};
const result = loadWebSearchConfig(config);
expect(result?.firecrawlVersion).toBe('v2');
});
it('should default to ${FIRECRAWL_VERSION} when not provided', () => {
const config: TCustomConfig['webSearch'] = {};
const result = loadWebSearchConfig(config);
expect(result?.firecrawlVersion).toBe('${FIRECRAWL_VERSION}');
});
it('should default to ${FIRECRAWL_VERSION} when config is undefined', () => {
const result = loadWebSearchConfig(undefined);
expect(result?.firecrawlVersion).toBe('${FIRECRAWL_VERSION}');
});
it('should preserve custom firecrawlVersion value', () => {
const config: TCustomConfig['webSearch'] = {
firecrawlVersion: 'v1',
};
const result = loadWebSearchConfig(config);
expect(result?.firecrawlVersion).toBe('v1');
});
});
describe('all config fields', () => {
it('should apply defaults for all fields when config is empty', () => {
const config: TCustomConfig['webSearch'] = {};
const result = loadWebSearchConfig(config);
expect(result).toEqual({
serperApiKey: '${SERPER_API_KEY}',
searxngInstanceUrl: '${SEARXNG_INSTANCE_URL}',
searxngApiKey: '${SEARXNG_API_KEY}',
firecrawlApiKey: '${FIRECRAWL_API_KEY}',
firecrawlApiUrl: '${FIRECRAWL_API_URL}',
firecrawlVersion: '${FIRECRAWL_VERSION}',
jinaApiKey: '${JINA_API_KEY}',
jinaApiUrl: '${JINA_API_URL}',
cohereApiKey: '${COHERE_API_KEY}',
safeSearch: SafeSearchTypes.MODERATE,
});
});
it('should preserve provided config values and merge with defaults', () => {
const config: TCustomConfig['webSearch'] = {
serperApiKey: 'custom-serper-key',
firecrawlApiKey: 'custom-firecrawl-key',
firecrawlVersion: 'v2',
safeSearch: SafeSearchTypes.STRICT,
};
const result = loadWebSearchConfig(config);
expect(result?.serperApiKey).toBe('custom-serper-key');
expect(result?.firecrawlApiKey).toBe('custom-firecrawl-key');
expect(result?.firecrawlVersion).toBe('v2');
expect(result?.safeSearch).toBe(SafeSearchTypes.STRICT);
expect(result?.jinaApiKey).toBe('${JINA_API_KEY}');
});
it('should preserve additional fields from input config', () => {
const config: TCustomConfig['webSearch'] = {
serperApiKey: 'test-key',
scraperProvider: ScraperProviders.SERPER,
searchProvider: SearchProviders.SERPER,
};
const result = loadWebSearchConfig(config);
expect(result?.scraperProvider).toBe('serper');
expect(result?.searchProvider).toBe('serper');
expect(result?.serperApiKey).toBe('test-key');
});
});
describe('safeSearch', () => {
it('should default to MODERATE when not provided', () => {
const config: TCustomConfig['webSearch'] = {};
const result = loadWebSearchConfig(config);
expect(result?.safeSearch).toBe(SafeSearchTypes.MODERATE);
});
it('should preserve OFF value', () => {
const config: TCustomConfig['webSearch'] = {
safeSearch: SafeSearchTypes.OFF,
};
const result = loadWebSearchConfig(config);
expect(result?.safeSearch).toBe(SafeSearchTypes.OFF);
});
it('should preserve STRICT value', () => {
const config: TCustomConfig['webSearch'] = {
safeSearch: SafeSearchTypes.STRICT,
};
const result = loadWebSearchConfig(config);
expect(result?.safeSearch).toBe(SafeSearchTypes.STRICT);
});
});
describe('API keys', () => {
it('should apply default placeholders for all API keys', () => {
const result = loadWebSearchConfig({});
expect(result?.serperApiKey).toBe('${SERPER_API_KEY}');
expect(result?.searxngApiKey).toBe('${SEARXNG_API_KEY}');
expect(result?.firecrawlApiKey).toBe('${FIRECRAWL_API_KEY}');
expect(result?.jinaApiKey).toBe('${JINA_API_KEY}');
expect(result?.cohereApiKey).toBe('${COHERE_API_KEY}');
});
it('should preserve custom API keys', () => {
const config: TCustomConfig['webSearch'] = {
serperApiKey: 'actual-serper-key',
jinaApiKey: 'actual-jina-key',
cohereApiKey: 'actual-cohere-key',
};
const result = loadWebSearchConfig(config);
expect(result?.serperApiKey).toBe('actual-serper-key');
expect(result?.jinaApiKey).toBe('actual-jina-key');
expect(result?.cohereApiKey).toBe('actual-cohere-key');
});
});
describe('URLs', () => {
it('should apply default placeholders for URLs', () => {
const result = loadWebSearchConfig({});
expect(result?.searxngInstanceUrl).toBe('${SEARXNG_INSTANCE_URL}');
expect(result?.firecrawlApiUrl).toBe('${FIRECRAWL_API_URL}');
expect(result?.jinaApiUrl).toBe('${JINA_API_URL}');
});
it('should preserve custom URLs', () => {
const config: TCustomConfig['webSearch'] = {
searxngInstanceUrl: 'https://custom-searxng.com',
firecrawlApiUrl: 'https://custom-firecrawl.com',
jinaApiUrl: 'https://custom-jina.com',
};
const result = loadWebSearchConfig(config);
expect(result?.searxngInstanceUrl).toBe('https://custom-searxng.com');
expect(result?.firecrawlApiUrl).toBe('https://custom-firecrawl.com');
expect(result?.jinaApiUrl).toBe('https://custom-jina.com');
});
});
});

View file

@ -18,6 +18,10 @@ export const webSearchAuth = {
firecrawlApiKey: 1 as const,
/** Optional (0) */
firecrawlApiUrl: 0 as const,
firecrawlVersion: 0 as const,
},
serper: {
serperApiKey: 1 as const,
},
},
rerankers: {
@ -31,10 +35,10 @@ export const webSearchAuth = {
};
/**
* Extracts all API keys from the webSearchAuth configuration object
* Extracts all unique API keys from the webSearchAuth configuration object
*/
export function getWebSearchKeys(): TWebSearchKeys[] {
const keys: TWebSearchKeys[] = [];
const keysSet = new Set<TWebSearchKeys>();
// Iterate through each category (providers, scrapers, rerankers)
for (const category of Object.keys(webSearchAuth)) {
@ -44,14 +48,14 @@ export function getWebSearchKeys(): TWebSearchKeys[] {
for (const service of Object.keys(categoryObj)) {
const serviceObj = categoryObj[service as keyof typeof categoryObj];
// Extract the API keys from the service
// Extract the API keys from the service and add to set for deduplication
for (const key of Object.keys(serviceObj)) {
keys.push(key as TWebSearchKeys);
keysSet.add(key as TWebSearchKeys);
}
}
}
return keys;
return Array.from(keysSet);
}
export const webSearchKeys: TWebSearchKeys[] = getWebSearchKeys();
@ -64,6 +68,7 @@ export function loadWebSearchConfig(
const searxngApiKey = config?.searxngApiKey ?? '${SEARXNG_API_KEY}';
const firecrawlApiKey = config?.firecrawlApiKey ?? '${FIRECRAWL_API_KEY}';
const firecrawlApiUrl = config?.firecrawlApiUrl ?? '${FIRECRAWL_API_URL}';
const firecrawlVersion = config?.firecrawlVersion ?? '${FIRECRAWL_VERSION}';
const jinaApiKey = config?.jinaApiKey ?? '${JINA_API_KEY}';
const jinaApiUrl = config?.jinaApiUrl ?? '${JINA_API_URL}';
const cohereApiKey = config?.cohereApiKey ?? '${COHERE_API_KEY}';
@ -76,9 +81,10 @@ export function loadWebSearchConfig(
jinaApiUrl,
cohereApiKey,
serperApiKey,
searxngInstanceUrl,
searxngApiKey,
firecrawlApiKey,
firecrawlApiUrl,
firecrawlVersion,
searxngInstanceUrl,
};
}

View file

@ -6,6 +6,7 @@ export type TWebSearchKeys =
| 'searxngApiKey'
| 'firecrawlApiKey'
| 'firecrawlApiUrl'
| 'firecrawlVersion'
| 'jinaApiKey'
| 'jinaApiUrl'
| 'cohereApiKey';