From 1312cd757c3e2634d59a73fb96f30dcd4c0d17e3 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Sun, 15 Mar 2026 18:05:08 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20fix:=20Validate=20User-?= =?UTF-8?q?provided=20URLs=20for=20Web=20Search=20(#12247)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🛡️ fix: SSRF-validate user-provided URLs in web search auth User-controlled URL fields (jinaApiUrl, firecrawlApiUrl, searxngInstanceUrl) flow from plugin auth into outbound HTTP requests without validation. Reuse existing isSSRFTarget/resolveHostnameSSRF to block private/internal targets while preserving admin-configured (env var) internal URLs. * 🛡️ fix: Harden web search SSRF validation - Reject non-HTTP(S) schemes (file://, ftp://, etc.) in isSSRFUrl - Conditional write: only assign to authResult after SSRF check passes - Move isUserProvided tracking after SSRF gate to avoid false positives - Add authenticated assertions for optional-field SSRF blocks in tests - Add file:// scheme rejection test - Wrap process.env mutation in try/finally guard - Add JSDoc + sync-obligation comment on WEB_SEARCH_URL_KEYS * 🛡️ fix: Correct auth-type reporting for SSRF-stripped optional URLs SSRF-stripped optional URL fields no longer pollute isUserProvided. Track whether the field actually contributed to authResult before crediting it as user-provided, so categories report SYSTEM_DEFINED when all surviving values match env vars. --- packages/api/src/web/web.spec.ts | 360 +++++++++++++++++++++++++++++++ packages/api/src/web/web.ts | 50 ++++- 2 files changed, 408 insertions(+), 2 deletions(-) diff --git a/packages/api/src/web/web.spec.ts b/packages/api/src/web/web.spec.ts index c7bb3f4962..74e02b20ef 100644 --- a/packages/api/src/web/web.spec.ts +++ b/packages/api/src/web/web.spec.ts @@ -18,6 +18,14 @@ jest.mock('../utils', () => ({ }, })); +const mockIsSSRFTarget = jest.fn().mockReturnValue(false); +const mockResolveHostnameSSRF = jest.fn().mockResolvedValue(false); + +jest.mock('../auth', () => ({ + isSSRFTarget: (...args: unknown[]) => mockIsSSRFTarget(...args), + resolveHostnameSSRF: (...args: unknown[]) => mockResolveHostnameSSRF(...args), +})); + describe('web.ts', () => { describe('extractWebSearchEnvVars', () => { it('should return empty array if config is undefined', () => { @@ -1227,4 +1235,356 @@ describe('web.ts', () => { expect(result.authResult.firecrawlOptions).toBeUndefined(); // Should be undefined }); }); + + describe('SSRF protection for user-provided URLs', () => { + const userId = 'test-user-id'; + let mockLoadAuthValues: jest.Mock; + + beforeEach(() => { + jest.clearAllMocks(); + mockLoadAuthValues = jest.fn(); + mockIsSSRFTarget.mockReturnValue(false); + mockResolveHostnameSSRF.mockResolvedValue(false); + }); + + it('should block user-provided jinaApiUrl targeting localhost', async () => { + mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === 'localhost'); + + const webSearchConfig: TCustomConfig['webSearch'] = { + serperApiKey: '${SERPER_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + jinaApiKey: '${JINA_API_KEY}', + jinaApiUrl: '${JINA_API_URL}', + safeSearch: SafeSearchTypes.MODERATE, + rerankerType: 'jina' as RerankerTypes, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'JINA_API_URL') { + result[field] = 'http://localhost:8080/rerank'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.jinaApiUrl).toBeUndefined(); + expect(mockIsSSRFTarget).toHaveBeenCalledWith('localhost'); + }); + + it('should block user-provided firecrawlApiUrl resolving to private IP', async () => { + mockResolveHostnameSSRF.mockImplementation((hostname: string) => + Promise.resolve(hostname === 'evil.internal-service.com'), + ); + + const webSearchConfig: TCustomConfig['webSearch'] = { + serperApiKey: '${SERPER_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + firecrawlApiUrl: '${FIRECRAWL_API_URL}', + jinaApiKey: '${JINA_API_KEY}', + safeSearch: SafeSearchTypes.MODERATE, + scraperProvider: 'firecrawl' as ScraperProviders, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'FIRECRAWL_API_URL') { + result[field] = 'https://evil.internal-service.com/scrape'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.firecrawlApiUrl).toBeUndefined(); + expect(result.authenticated).toBe(true); + const scrapersAuth = result.authTypes.find(([c]) => c === 'scrapers')?.[1]; + expect(scrapersAuth).toBe(AuthType.USER_PROVIDED); + }); + + it('should block user-provided searxngInstanceUrl targeting metadata endpoint', async () => { + mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === '169.254.169.254'); + + const webSearchConfig: TCustomConfig['webSearch'] = { + searxngInstanceUrl: '${SEARXNG_INSTANCE_URL}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + jinaApiKey: '${JINA_API_KEY}', + safeSearch: SafeSearchTypes.MODERATE, + searchProvider: 'searxng' as SearchProviders, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'SEARXNG_INSTANCE_URL') { + result[field] = 'http://169.254.169.254/latest/meta-data'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.searxngInstanceUrl).toBeUndefined(); + expect(result.authenticated).toBe(false); + }); + + it('should allow system-defined URLs even if they match SSRF patterns', async () => { + mockIsSSRFTarget.mockReturnValue(true); + + const originalEnv = process.env; + try { + process.env = { + ...originalEnv, + JINA_API_KEY: 'system-jina-key', + JINA_API_URL: 'http://jina-internal:8080/rerank', + }; + + const webSearchConfig: TCustomConfig['webSearch'] = { + serperApiKey: '${SERPER_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + jinaApiKey: '${JINA_API_KEY}', + jinaApiUrl: '${JINA_API_URL}', + safeSearch: SafeSearchTypes.MODERATE, + rerankerType: 'jina' as RerankerTypes, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'JINA_API_KEY') { + result[field] = 'system-jina-key'; + } else if (field === 'JINA_API_URL') { + result[field] = 'http://jina-internal:8080/rerank'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.jinaApiUrl).toBe('http://jina-internal:8080/rerank'); + expect(result.authenticated).toBe(true); + } finally { + process.env = originalEnv; + } + }); + + it('should reject URLs with invalid format', async () => { + const webSearchConfig: TCustomConfig['webSearch'] = { + serperApiKey: '${SERPER_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + firecrawlApiUrl: '${FIRECRAWL_API_URL}', + jinaApiKey: '${JINA_API_KEY}', + safeSearch: SafeSearchTypes.MODERATE, + scraperProvider: 'firecrawl' as ScraperProviders, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'FIRECRAWL_API_URL') { + result[field] = 'not-a-valid-url'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.firecrawlApiUrl).toBeUndefined(); + expect(result.authenticated).toBe(true); + const scrapersAuth = result.authTypes.find(([c]) => c === 'scrapers')?.[1]; + expect(scrapersAuth).toBe(AuthType.USER_PROVIDED); + }); + + it('should reject non-HTTP schemes like file://', async () => { + const webSearchConfig: TCustomConfig['webSearch'] = { + serperApiKey: '${SERPER_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + firecrawlApiUrl: '${FIRECRAWL_API_URL}', + jinaApiKey: '${JINA_API_KEY}', + safeSearch: SafeSearchTypes.MODERATE, + scraperProvider: 'firecrawl' as ScraperProviders, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'FIRECRAWL_API_URL') { + result[field] = 'file:///etc/passwd'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.firecrawlApiUrl).toBeUndefined(); + expect(result.authenticated).toBe(true); + }); + + it('should allow legitimate external URLs', async () => { + const webSearchConfig: TCustomConfig['webSearch'] = { + serperApiKey: '${SERPER_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + firecrawlApiUrl: '${FIRECRAWL_API_URL}', + jinaApiKey: '${JINA_API_KEY}', + jinaApiUrl: '${JINA_API_URL}', + safeSearch: SafeSearchTypes.MODERATE, + scraperProvider: 'firecrawl' as ScraperProviders, + rerankerType: 'jina' as RerankerTypes, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'FIRECRAWL_API_URL') { + result[field] = 'https://api.firecrawl.dev'; + } else if (field === 'JINA_API_URL') { + result[field] = 'https://api.jina.ai/v1/rerank'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.firecrawlApiUrl).toBe('https://api.firecrawl.dev'); + expect(result.authResult.jinaApiUrl).toBe('https://api.jina.ai/v1/rerank'); + expect(result.authenticated).toBe(true); + }); + + it('should fail required URL field and mark category unauthenticated', async () => { + mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === '127.0.0.1'); + + const webSearchConfig: TCustomConfig['webSearch'] = { + searxngInstanceUrl: '${SEARXNG_INSTANCE_URL}', + searxngApiKey: '${SEARXNG_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + jinaApiKey: '${JINA_API_KEY}', + safeSearch: SafeSearchTypes.MODERATE, + searchProvider: 'searxng' as SearchProviders, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'SEARXNG_INSTANCE_URL') { + result[field] = 'http://127.0.0.1:8888/search'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authenticated).toBe(false); + const providersAuthType = result.authTypes.find( + ([category]) => category === 'providers', + )?.[1]; + expect(providersAuthType).toBe(AuthType.USER_PROVIDED); + }); + + it('should report SYSTEM_DEFINED when only user-provided field is a stripped SSRF URL', async () => { + mockIsSSRFTarget.mockImplementation((hostname: string) => hostname === 'localhost'); + + const originalEnv = process.env; + try { + process.env = { + ...originalEnv, + JINA_API_KEY: 'system-jina-key', + }; + + const webSearchConfig: TCustomConfig['webSearch'] = { + serperApiKey: '${SERPER_API_KEY}', + firecrawlApiKey: '${FIRECRAWL_API_KEY}', + jinaApiKey: '${JINA_API_KEY}', + jinaApiUrl: '${JINA_API_URL}', + safeSearch: SafeSearchTypes.MODERATE, + rerankerType: 'jina' as RerankerTypes, + }; + + mockLoadAuthValues.mockImplementation(({ authFields }) => { + const result: Record = {}; + authFields.forEach((field: string) => { + if (field === 'JINA_API_KEY') { + result[field] = 'system-jina-key'; + } else if (field === 'JINA_API_URL') { + result[field] = 'http://localhost:9999/rerank'; + } else { + result[field] = 'test-api-key'; + } + }); + return Promise.resolve(result); + }); + + const result = await loadWebSearchAuth({ + userId, + webSearchConfig, + loadAuthValues: mockLoadAuthValues, + }); + + expect(result.authResult.jinaApiUrl).toBeUndefined(); + expect(result.authenticated).toBe(true); + const rerankersAuth = result.authTypes.find(([c]) => c === 'rerankers')?.[1]; + expect(rerankersAuth).toBe(AuthType.SYSTEM_DEFINED); + } finally { + process.env = originalEnv; + } + }); + }); }); diff --git a/packages/api/src/web/web.ts b/packages/api/src/web/web.ts index ad172e187f..cc0d8688ca 100644 --- a/packages/api/src/web/web.ts +++ b/packages/api/src/web/web.ts @@ -13,6 +13,37 @@ import type { TWebSearchConfig, } from 'librechat-data-provider'; import type { TWebSearchKeys, TWebSearchCategories } from '@librechat/data-schemas'; +import { isSSRFTarget, resolveHostnameSSRF } from '../auth'; + +/** + * URL-type keys in TWebSearchKeys (not API keys or version strings). + * Must stay in sync with URL-typed fields in webSearchAuth (packages/data-schemas). + */ +const WEB_SEARCH_URL_KEYS = new Set([ + 'searxngInstanceUrl', + 'firecrawlApiUrl', + 'jinaApiUrl', +]); + +/** + * Returns true if the URL should be blocked for SSRF risk. + * Fail-closed: unparseable URLs and non-HTTP(S) schemes return true. + */ +async function isSSRFUrl(url: string): Promise { + let parsed: URL; + try { + parsed = new URL(url); + } catch { + return true; + } + if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') { + return true; + } + if (isSSRFTarget(parsed.hostname)) { + return true; + } + return resolveHostnameSSRF(parsed.hostname); +} export function extractWebSearchEnvVars({ keys, @@ -149,12 +180,27 @@ export async function loadWebSearchAuth({ const field = allAuthFields[j]; const value = authValues[field]; const originalKey = allKeys[j]; - if (originalKey) authResult[originalKey] = value; + if (!optionalSet.has(field) && !value) { allFieldsAuthenticated = false; break; } - if (!isUserProvided && process.env[field] !== value) { + + const isFieldUserProvided = value != null && process.env[field] !== value; + const isUrlKey = originalKey != null && WEB_SEARCH_URL_KEYS.has(originalKey); + let contributed = false; + + if (isUrlKey && isFieldUserProvided && (await isSSRFUrl(value))) { + if (!optionalSet.has(field)) { + allFieldsAuthenticated = false; + break; + } + } else if (originalKey) { + authResult[originalKey] = value; + contributed = true; + } + + if (!isUserProvided && isFieldUserProvided && contributed) { isUserProvided = true; } }