From 265d82dab7079c6dadda7dc0fff31618c8b6aef5 Mon Sep 17 00:00:00 2001 From: Will Wilson Date: Mon, 9 Mar 2026 00:58:33 +0000 Subject: [PATCH] fix: remove accidentally included agent-browser docs The docs/docs/configuration/tools/agent-browser.mdx file was unintentionally included in this PR (merged from a separate branch). This PR is only for TCP health checks on database services. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../configuration/tools/agent-browser.mdx | 205 ------------------ 1 file changed, 205 deletions(-) delete mode 100644 docs/docs/configuration/tools/agent-browser.mdx diff --git a/docs/docs/configuration/tools/agent-browser.mdx b/docs/docs/configuration/tools/agent-browser.mdx deleted file mode 100644 index a09b64576c..0000000000 --- a/docs/docs/configuration/tools/agent-browser.mdx +++ /dev/null @@ -1,205 +0,0 @@ ---- -title: Agent Browser MCP -description: Browser automation via MCP using Vercel's agent-browser library (Playwright + @ref accessibility snapshots) ---- - -import { Steps, Callout, Tabs } from 'nextra/components' - -# Agent Browser MCP Server - -The agent-browser MCP server provides AI-optimised browser automation for LibreChat agents, powered by [Vercel's `agent-browser` library](https://www.npmjs.com/package/agent-browser) which uses Playwright with accessibility tree snapshots. - -## Why agent-browser instead of raw Playwright/Puppeteer? - -Raw Playwright and Puppeteer expose CSS selectors and XPath expressions to the model. These are brittle in single-page applications, break when a site redeploys, and require the model to infer element identity from unstructured HTML. - -`agent-browser` solves this by producing **accessibility tree snapshots** with stable `@ref` identifiers: - -``` -button [@e3] "Sign in" -input [@e7] placeholder="Email address" -``` - -Every interactive element gets a unique `@e1`, `@e2`, `@e3`… reference that the model can pass directly to `click` or `fill`. This lets the LLM: - -- Reference elements precisely without fragile CSS selectors -- Navigate complex SPAs without XPath hacks -- Interact reliably with dynamically rendered content - -## Tools provided - -| Tool | Description | -|------|-------------| -| `navigate` | Navigate to a URL; returns the page title | -| `snapshot` | Get the accessibility tree with `@ref` identifiers for all interactive elements | -| `click` | Click an element by `@ref` (from snapshot) or CSS selector | -| `fill` | Clear and type into an input field by `@ref` or CSS selector | -| `get_text` | Extract text content from an element by CSS selector | -| `press_key` | Press a keyboard key (Enter, Tab, Escape, ArrowDown, etc.) | -| `screenshot` | Take a screenshot of the current page (returns base64 PNG) | -| `get_url` | Get the current browser URL | -| `close_browser` | Close the browser session and free all resources | - -## Setup - -### Prerequisites - -- Docker Compose (recommended) **or** Node.js ≥ 20 + Playwright system dependencies -- LibreChat configured with `mcpServers` in `librechat.yaml` - - - -### Run the MCP server - - - -Add to your `docker-compose.override.yml`: - -```yaml -services: - agent-browser-mcp: - build: - context: ./packages/mcp-servers/agent-browser - environment: - - PORT=8932 - # Optional: path to a specific Chromium binary - # - CHROMIUM_PATH=/usr/bin/chromium - ports: - - "8932:8932" - restart: unless-stopped -``` - - -```bash -# Clone LibreChat -git clone https://github.com/danny-avila/LibreChat -cd LibreChat/packages/mcp-servers/agent-browser - -npm install -npx playwright install chromium --with-deps - -npm run build -npm start -``` - -The server listens on `http://localhost:8932` by default. Set `PORT` to override. - - - -### Configure librechat.yaml - -Add the server to `mcpServers` in your `librechat.yaml`: - -```yaml -mcpServers: - agent-browser: - type: sse - url: http://agent-browser-mcp:8932/sse - # Adjust the URL for local/non-Docker setups: - # url: http://localhost:8932/sse - autoApprove: - - navigate - - snapshot - - click - - fill - - get_text - - press_key - - screenshot - - get_url - - close_browser -``` - - - -## Environment variables - -| Variable | Default | Description | -|----------|---------|-------------| -| `PORT` | `8932` | HTTP port the MCP server listens on | -| `CHROMIUM_PATH` | _(Playwright managed)_ | Path to a custom Chromium binary | - -## Implementation reference - -If you are building your own MCP SSE server or extending this one, the following pattern is critical. - -### Critical: Do not add `express.json()` middleware - -The MCP `SSEServerTransport.handlePostMessage` reads the raw request stream internally. Adding `express.json()` upstream of the POST `/messages` route causes Express to consume the stream before the SDK can read it, producing **HTTP 400 "stream is not readable"** on every `initialize` call and preventing all tool execution. - -```typescript -import express from "express"; -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js"; - -// CORRECT: no express.json() anywhere on this app -const app = express(); -const transports = new Map(); - -app.get("/sse", async (req, res) => { - const transport = new SSEServerTransport("/messages", res); - transports.set(transport.sessionId, transport); - const server = buildMcpServer(); // creates McpServer with all tools - await server.connect(transport); - res.on("close", () => transports.delete(transport.sessionId)); -}); - -app.post("/messages", async (req, res) => { - const transport = transports.get(req.query.sessionId as string); - if (!transport) { - res.status(404).json({ error: "Session not found" }); - return; - } - await transport.handlePostMessage(req, res); -}); -``` - -### Session management - -Each LibreChat client connection creates its own `SSEServerTransport` instance on `GET /sse`. The transport's `sessionId` (a UUID generated by the SDK) is appended to the client's POST `/messages` requests as `?sessionId=…`, routing each message back to the correct server-sent events connection. - -### Tool registration pattern - -Tools are registered using the `McpServer` fluent API with [Zod](https://zod.dev) schemas for parameter validation: - -```typescript -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { z } from "zod"; - -function buildMcpServer(): McpServer { - const server = new McpServer({ name: "agent-browser", version: "1.0.0" }); - - server.tool( - "navigate", - "Navigate the browser to a URL. Returns the page title.", - { url: z.string().describe("Full URL including https://") }, - async ({ url }) => { - // ... call agent-browser BrowserManager - return { content: [{ type: "text", text: `Navigated to: ${title}` }] }; - } - ); - - // Register remaining tools... - return server; -} -``` - -## Typical agent workflow - -``` -1. navigate → https://example.com -2. snapshot → gets accessibility tree with @e1, @e2, @e3 refs -3. fill → @e7 "search query" -4. press_key → Enter -5. snapshot → inspect updated page -6. get_text → .result-list (extract results) -``` - - - Call `close_browser` when the task is finished to free Playwright resources. The browser session is shared across tool calls within a single server process, so leaving it open between tasks is intentional but consumes memory. - - -## Related - -- [MCP Server configuration reference](/docs/configuration/librechat_yaml/object_structure/mcp_servers) -- [Vercel `agent-browser` npm package](https://www.npmjs.com/package/agent-browser) -- [Model Context Protocol SDK](https://github.com/modelcontextprotocol/typescript-sdk)