diff --git a/docs/docs/configuration/tools/agent-browser.mdx b/docs/docs/configuration/tools/agent-browser.mdx index a09b64576c..10f0e3701d 100644 --- a/docs/docs/configuration/tools/agent-browser.mdx +++ b/docs/docs/configuration/tools/agent-browser.mdx @@ -53,7 +53,7 @@ Every interactive element gets a unique `@e1`, `@e2`, `@e3`… reference that th -Add to your `docker-compose.override.yml`: +Add to your `docker-compose.override.yaml`: ```yaml services: @@ -64,8 +64,8 @@ services: - PORT=8932 # Optional: path to a specific Chromium binary # - CHROMIUM_PATH=/usr/bin/chromium - ports: - - "8932:8932" + # Internal Docker network only — do not expose publicly without auth + # For local dev, uncomment: ports: ["127.0.0.1:8932:8932"] restart: unless-stopped ``` @@ -88,25 +88,21 @@ The server listens on `http://localhost:8932` by default. Set `PORT` to override ### Configure librechat.yaml -Add the server to `mcpServers` in your `librechat.yaml`: +Add the server to your `librechat.yaml`: ```yaml +# Allow the MCP client to reach the agent-browser server +mcpSettings: + allowedDomains: + - http://agent-browser-mcp:8932 + - http://localhost:8932 + mcpServers: agent-browser: type: sse url: http://agent-browser-mcp:8932/sse - # Adjust the URL for local/non-Docker setups: + # For local/non-Docker setups: # url: http://localhost:8932/sse - autoApprove: - - navigate - - snapshot - - click - - fill - - get_text - - press_key - - screenshot - - get_url - - close_browser ``` @@ -118,6 +114,15 @@ mcpServers: | `PORT` | `8932` | HTTP port the MCP server listens on | | `CHROMIUM_PATH` | _(Playwright managed)_ | Path to a custom Chromium binary | +### Security + + + **SSRF protection:** The `navigate` tool validates URLs and rejects requests to private + IP ranges (10.x, 192.168.x, 172.16-31.x, 127.x, 169.254.x) and internal hostnames + (localhost, .local, .internal). For internal/homelab use, fork the server and adjust + the `isAllowedUrl()` function in `src/server.ts`. + + ## Implementation reference If you are building your own MCP SSE server or extending this one, the following pattern is critical. @@ -171,9 +176,18 @@ function buildMcpServer(): McpServer { server.tool( "navigate", "Navigate the browser to a URL. Returns the page title.", - { url: z.string().describe("Full URL including https://") }, + { + url: z + .string() + .url() + .refine(isAllowedUrl, { + message: + "URL must use http/https and must not point to private, loopback, or link-local addresses.", + }) + .describe("Full URL including https://"), + }, async ({ url }) => { - // ... call agent-browser BrowserManager + // ... call agent-browser BrowserManager with a validated, external URL return { content: [{ type: "text", text: `Navigated to: ${title}` }] }; } ); @@ -200,6 +214,6 @@ function buildMcpServer(): McpServer { ## Related -- [MCP Server configuration reference](/docs/configuration/librechat_yaml/object_structure/mcp_servers) +- [MCP Server configuration](https://www.librechat.ai/docs/configuration/librechat_yaml/object_structure/mcp_servers) - [Vercel `agent-browser` npm package](https://www.npmjs.com/package/agent-browser) - [Model Context Protocol SDK](https://github.com/modelcontextprotocol/typescript-sdk) diff --git a/packages/mcp-servers/agent-browser/.env.example b/packages/mcp-servers/agent-browser/.env.example new file mode 100644 index 0000000000..9696a76d34 --- /dev/null +++ b/packages/mcp-servers/agent-browser/.env.example @@ -0,0 +1,9 @@ +PORT=8932 +CHROMIUM_PATH=/usr/bin/chromium + +# Optional: Perplexica web search integration +# PERPLEXICA_URL=http://perplexica:3001 +# PERPLEXICA_CHAT_PROVIDER=openai +# PERPLEXICA_CHAT_MODEL=gpt-4 +# PERPLEXICA_EMBED_PROVIDER=ollama-embeddings +# PERPLEXICA_EMBED_MODEL=nomic-embed-text:latest diff --git a/packages/mcp-servers/agent-browser/Dockerfile b/packages/mcp-servers/agent-browser/Dockerfile new file mode 100644 index 0000000000..c373ef1957 --- /dev/null +++ b/packages/mcp-servers/agent-browser/Dockerfile @@ -0,0 +1,42 @@ +FROM node:22-slim AS builder + +WORKDIR /app +COPY package.json tsconfig.json ./ +RUN npm install +COPY src/ src/ +RUN npm run build + +FROM node:22-slim + +# Install Chromium dependencies for Playwright +RUN apt-get update && apt-get install -y --no-install-recommends \ + chromium \ + fonts-liberation \ + libasound2 \ + libatk-bridge2.0-0 \ + libatk1.0-0 \ + libcups2 \ + libdbus-1-3 \ + libdrm2 \ + libgbm1 \ + libgtk-3-0 \ + libnspr4 \ + libnss3 \ + libx11-xcb1 \ + libxcomposite1 \ + libxdamage1 \ + libxrandr2 \ + xdg-utils \ + && rm -rf /var/lib/apt/lists/* + +RUN groupadd -r appuser && useradd -r -g appuser -d /app appuser +WORKDIR /app +COPY --from=builder /app/dist dist/ +COPY --from=builder /app/node_modules node_modules/ +COPY package.json ./ + +ENV CHROMIUM_PATH=/usr/bin/chromium +USER appuser +EXPOSE 8932 + +CMD ["node", "dist/server.js"] diff --git a/packages/mcp-servers/agent-browser/README.md b/packages/mcp-servers/agent-browser/README.md new file mode 100644 index 0000000000..57999b61e9 --- /dev/null +++ b/packages/mcp-servers/agent-browser/README.md @@ -0,0 +1,42 @@ +# @librechat/mcp-agent-browser + +Vercel [agent-browser](https://github.com/vercel-labs/agent-browser) wrapped as an MCP SSE server for LibreChat. + +Uses Playwright with AI-optimised accessibility tree `@ref` snapshots — significantly better than raw CSS selectors for LLM-driven browser automation. + +## Tools + +| Tool | Description | +| --- | --- | +| `navigate` | Navigate to a URL (SSRF-protected) | +| `snapshot` | Get accessibility snapshot with `@ref` identifiers | +| `click` | Click element by `@ref` or CSS selector | +| `fill` | Fill form input by `@ref` or CSS selector | +| `get_text` | Get text content of an element | +| `press_key` | Press a keyboard key | +| `screenshot` | Take page screenshot | +| `get_url` | Get current URL | +| `close_browser` | Close browser session | +| `perplexica_search` | *(Optional)* Web search via Perplexica | + +## Quick Start + +```bash +docker build -t agent-browser-mcp . +docker run -p 8932:8932 agent-browser-mcp +``` + +## LibreChat Configuration + +```yaml +mcpServers: + agent-browser: + type: sse + url: http://agent-browser-mcp:8932/sse +``` + +## Security + +- **SSRF protection**: The `navigate` tool rejects private IPs (10.x, 192.168.x, 172.16-31.x, 127.x) and internal hostnames. +- Runs as non-root `appuser` in Docker. +- No `express.json()` middleware — see source comments for explanation. diff --git a/packages/mcp-servers/agent-browser/package.json b/packages/mcp-servers/agent-browser/package.json new file mode 100644 index 0000000000..8152aa9261 --- /dev/null +++ b/packages/mcp-servers/agent-browser/package.json @@ -0,0 +1,24 @@ +{ + "name": "@librechat/mcp-agent-browser", + "version": "1.0.0", + "description": "Vercel agent-browser MCP SSE server for LibreChat — Playwright-based browser automation with AI-optimised @ref snapshots", + "type": "module", + "main": "dist/server.js", + "scripts": { + "build": "tsc", + "start": "node dist/server.js", + "dev": "tsx src/server.ts" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.0.0", + "agent-browser": "^0.16.0", + "express": "^4.21.0", + "zod": "^3.23.0" + }, + "devDependencies": { + "@types/express": "^4.17.21", + "@types/node": "^22.0.0", + "tsx": "^4.19.0", + "typescript": "^5.6.0" + } +} diff --git a/packages/mcp-servers/agent-browser/src/server.ts b/packages/mcp-servers/agent-browser/src/server.ts new file mode 100644 index 0000000000..64a77ea66e --- /dev/null +++ b/packages/mcp-servers/agent-browser/src/server.ts @@ -0,0 +1,283 @@ +import express, { Request, Response } from "express"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js"; +import { z } from "zod"; +import { BrowserManager } from "agent-browser/dist/browser.js"; +import { executeCommand } from "agent-browser/dist/actions.js"; + +const PORT = parseInt(process.env.PORT ?? "8932"); +const CHROMIUM_PATH = process.env.CHROMIUM_PATH ?? ""; + +// Optional Perplexica integration — only enabled if PERPLEXICA_URL is set +const PERPLEXICA_URL = process.env.PERPLEXICA_URL ?? ""; +const PERPLEXICA_CHAT_PROVIDER = process.env.PERPLEXICA_CHAT_PROVIDER ?? ""; +const PERPLEXICA_CHAT_MODEL = process.env.PERPLEXICA_CHAT_MODEL ?? ""; +const PERPLEXICA_EMBED_PROVIDER = process.env.PERPLEXICA_EMBED_PROVIDER ?? ""; +const PERPLEXICA_EMBED_MODEL = process.env.PERPLEXICA_EMBED_MODEL ?? ""; + +let browser: BrowserManager | null = null; +let cmdId = 0; +const nextId = () => `c${++cmdId}`; + +async function getBrowser(): Promise { + if (!browser?.isLaunched()) { + browser = new BrowserManager(); + const launchCmd: Record = { id: nextId(), action: "launch", headless: true }; + if (CHROMIUM_PATH) launchCmd.executablePath = CHROMIUM_PATH; + const resp = await executeCommand(launchCmd as any, browser); + if (!resp.success) throw new Error(`Browser launch failed: ${(resp as any).error}`); + } + return browser; +} + +async function cmd(command: Record): Promise { + const b = await getBrowser(); + const resp = await executeCommand({ id: nextId(), ...command } as any, b); + if (!resp.success) throw new Error((resp as any).error ?? "Command failed"); + return (resp as any).data as T; +} + +// --- SSRF Protection --- +const isPrivateHostname = (hostname: string): boolean => { + const lower = hostname.toLowerCase(); + return ( + lower === "localhost" || + lower === "ip6-localhost" || + lower.endsWith(".local") || + lower.endsWith(".internal") + ); +}; + +const isPrivateIp = (hostname: string): boolean => { + if (/^\d{1,3}(\.\d{1,3}){3}$/.test(hostname)) { + const parts = hostname.split(".").map(Number); + const [a, b] = parts; + if (a === 10) return true; + if (a === 127) return true; + if (a === 169 && b === 254) return true; + if (a === 172 && b >= 16 && b <= 31) return true; + if (a === 192 && b === 168) return true; + } + return hostname === "::1"; +}; + +const isAllowedUrl = (value: string): boolean => { + try { + const parsed = new URL(value); + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return false; + return !isPrivateHostname(parsed.hostname) && !isPrivateIp(parsed.hostname); + } catch { + return false; + } +}; + +// --- Optional Perplexica search --- +async function perplexicaChat(query: string): Promise { + if (!PERPLEXICA_URL) throw new Error("Perplexica not configured"); + const messageId = `msg-${Date.now()}`; + const chatId = `chat-${Date.now()}`; + const body = { + message: { messageId, chatId, role: "user", content: query }, + chatModel: { providerId: PERPLEXICA_CHAT_PROVIDER, key: PERPLEXICA_CHAT_MODEL }, + embeddingModel: { providerId: PERPLEXICA_EMBED_PROVIDER, key: PERPLEXICA_EMBED_MODEL }, + sources: ["web"], + optimizationMode: "speed", + history: [], + }; + + const resp = await fetch(`${PERPLEXICA_URL}/api/chat`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + if (!resp.ok) throw new Error(`Perplexica ${resp.status}: ${await resp.text()}`); + + const rawText = await resp.text(); + const blockValues: Map = new Map(); + for (const line of rawText.split("\n")) { + const trimmed = line.trim(); + if (!trimmed) continue; + let event: any; + try { event = JSON.parse(trimmed); } catch { continue; } + if (event.type === "error") throw new Error(event.data ?? "Perplexica error"); + if (event.type === "updateBlock" && Array.isArray(event.patch)) { + for (const patch of event.patch) { + if (patch.op === "replace" && patch.path === "/data") { + blockValues.set(event.blockId, String(patch.value ?? "")); + } + } + } + } + return Array.from(blockValues.values()).join("\n\n").trim() || "No response from Perplexica"; +} + +function buildMcpServer(): McpServer { + const server = new McpServer({ name: "agent-browser", version: "1.0.0" }); + + // Register Perplexica search only if configured + if (PERPLEXICA_URL) { + server.tool( + "perplexica_search", + "Search the web using Perplexica AI (gives cited answers).", + { query: z.string().describe("Search query") }, + async ({ query }: { query: string }) => { + try { + const result = await perplexicaChat(query); + return { content: [{ type: "text", text: result }] }; + } catch (e) { + return { content: [{ type: "text", text: `Perplexica error: ${String(e)}` }] }; + } + } + ); + } + + server.tool( + "navigate", + "Navigate the browser to a URL. Returns the page title. SSRF-protected: rejects private/internal addresses.", + { + url: z.string().url().refine(isAllowedUrl, { + message: "URL must use http/https and must not point to private or loopback addresses.", + }).describe("Full public URL including https://"), + }, + async ({ url }: { url: string }) => { + const data = await cmd<{ url: string; title: string }>({ action: "navigate", url }); + return { content: [{ type: "text", text: `Navigated to: ${data.title} (${data.url})` }] }; + } + ); + + server.tool( + "snapshot", + "Get an accessibility snapshot of the current page with @ref identifiers. Use refs with click/fill tools.", + {}, + async () => { + const data = await cmd<{ snapshot: string; origin?: string }>({ + action: "snapshot", + interactive: true, + }); + return { content: [{ type: "text", text: data.snapshot }] }; + } + ); + + server.tool( + "click", + "Click an element by @ref (from snapshot) or CSS selector.", + { ref: z.string().describe("@ref from snapshot (e.g. '@e1') or CSS selector") }, + async ({ ref }: { ref: string }) => { + await cmd({ action: "click", selector: ref }); + return { content: [{ type: "text", text: `Clicked ${ref}` }] }; + } + ); + + server.tool( + "fill", + "Clear a form input and type a new value. Use @ref from snapshot or CSS selector.", + { + ref: z.string().describe("@ref from snapshot or CSS selector"), + value: z.string().describe("Value to enter"), + }, + async ({ ref, value }: { ref: string; value: string }) => { + await cmd({ action: "fill", selector: ref, value }); + return { content: [{ type: "text", text: `Filled ${ref} with "${value}"` }] }; + } + ); + + server.tool( + "get_text", + "Get the text content of an element by CSS selector.", + { selector: z.string().describe("CSS selector") }, + async ({ selector }: { selector: string }) => { + const data = await cmd<{ text: string; origin?: string }>({ action: "gettext", selector }); + return { content: [{ type: "text", text: data.text.slice(0, 2000) }] }; + } + ); + + server.tool( + "press_key", + "Press a keyboard key globally (e.g. Enter, Tab, Escape, ArrowDown).", + { key: z.string().describe("Key name e.g. Enter, Tab, ArrowDown") }, + async ({ key }: { key: string }) => { + const b = await getBrowser(); + await b.getPage().keyboard.press(key); + return { content: [{ type: "text", text: `Pressed ${key}` }] }; + } + ); + + server.tool( + "screenshot", + "Take a screenshot of the current page.", + {}, + async () => { + const b = await getBrowser(); + const page = b.getPage(); + await page.screenshot({ path: "/tmp/screenshot.png" }); + return { content: [{ type: "text", text: "Screenshot taken (saved to /tmp/screenshot.png)" }] }; + } + ); + + server.tool( + "get_url", + "Get the current browser URL.", + {}, + async () => { + const data = await cmd<{ url: string }>({ action: "url" }); + return { content: [{ type: "text", text: data.url }] }; + } + ); + + server.tool( + "close_browser", + "Close the browser session and free resources.", + {}, + async () => { + if (browser) { + const b = browser.getBrowser(); + if (b) await b.close().catch(() => {}); + browser = null; + } + return { content: [{ type: "text", text: "Browser closed" }] }; + } + ); + + return server; +} + +// CRITICAL: Do NOT add express.json() or any body-parsing middleware here. +// SSEServerTransport.handlePostMessage() reads the raw request body as a Node.js readable +// stream. If express.json() pre-consumes the stream, every MCP initialize handshake fails +// with HTTP 400 "stream is not readable", silently preventing all tool execution. +const app = express(); +const transports: Map = new Map(); + +app.get("/health", (_req: Request, res: Response) => { + const tools = [ + "navigate", "snapshot", "click", "fill", "get_text", + "press_key", "screenshot", "get_url", "close_browser", + ]; + if (PERPLEXICA_URL) tools.unshift("perplexica_search"); + res.json({ status: "ok", tools }); +}); + +app.get("/sse", async (req: Request, res: Response) => { + const transport = new SSEServerTransport("/messages", res); + const id = transport.sessionId; + transports.set(id, transport); + const server = buildMcpServer(); + await server.connect(transport); + res.on("close", () => transports.delete(id)); +}); + +app.post("/messages", async (req: Request, res: Response) => { + const id = req.query.sessionId as string; + const transport = transports.get(id); + if (!transport) { + res.status(404).json({ error: "Session not found" }); + return; + } + await transport.handlePostMessage(req, res); +}); + +app.listen(PORT, () => { + console.log(`agent-browser MCP server listening on port ${PORT}`); + if (PERPLEXICA_URL) console.log(`Perplexica integration enabled: ${PERPLEXICA_URL}`); + else console.log("Perplexica integration disabled (set PERPLEXICA_URL to enable)"); +}); diff --git a/packages/mcp-servers/agent-browser/tsconfig.json b/packages/mcp-servers/agent-browser/tsconfig.json new file mode 100644 index 0000000000..4963332ff7 --- /dev/null +++ b/packages/mcp-servers/agent-browser/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "outDir": "dist", + "rootDir": "src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "declaration": true + }, + "include": ["src"] +}