mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-04-07 00:15:23 +02:00
Merge 0dc05d9e77 into 8ed0bcf5ca
This commit is contained in:
commit
dbe07c9fb2
7 changed files with 633 additions and 0 deletions
219
docs/docs/configuration/tools/agent-browser.mdx
Normal file
219
docs/docs/configuration/tools/agent-browser.mdx
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
---
|
||||
title: Agent Browser MCP
|
||||
description: Browser automation via MCP using Vercel's agent-browser library (Playwright + @ref accessibility snapshots)
|
||||
---
|
||||
|
||||
import { Steps, Callout, Tabs } from 'nextra/components'
|
||||
|
||||
# Agent Browser MCP Server
|
||||
|
||||
The agent-browser MCP server provides AI-optimised browser automation for LibreChat agents, powered by [Vercel's `agent-browser` library](https://www.npmjs.com/package/agent-browser) which uses Playwright with accessibility tree snapshots.
|
||||
|
||||
## Why agent-browser instead of raw Playwright/Puppeteer?
|
||||
|
||||
Raw Playwright and Puppeteer expose CSS selectors and XPath expressions to the model. These are brittle in single-page applications, break when a site redeploys, and require the model to infer element identity from unstructured HTML.
|
||||
|
||||
`agent-browser` solves this by producing **accessibility tree snapshots** with stable `@ref` identifiers:
|
||||
|
||||
```
|
||||
button [@e3] "Sign in"
|
||||
input [@e7] placeholder="Email address"
|
||||
```
|
||||
|
||||
Every interactive element gets a unique `@e1`, `@e2`, `@e3`… reference that the model can pass directly to `click` or `fill`. This lets the LLM:
|
||||
|
||||
- Reference elements precisely without fragile CSS selectors
|
||||
- Navigate complex SPAs without XPath hacks
|
||||
- Interact reliably with dynamically rendered content
|
||||
|
||||
## Tools provided
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `navigate` | Navigate to a URL; returns the page title |
|
||||
| `snapshot` | Get the accessibility tree with `@ref` identifiers for all interactive elements |
|
||||
| `click` | Click an element by `@ref` (from snapshot) or CSS selector |
|
||||
| `fill` | Clear and type into an input field by `@ref` or CSS selector |
|
||||
| `get_text` | Extract text content from an element by CSS selector |
|
||||
| `press_key` | Press a keyboard key (Enter, Tab, Escape, ArrowDown, etc.) |
|
||||
| `screenshot` | Take a screenshot of the current page (returns base64 PNG) |
|
||||
| `get_url` | Get the current browser URL |
|
||||
| `close_browser` | Close the browser session and free all resources |
|
||||
|
||||
## Setup
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Docker Compose (recommended) **or** Node.js ≥ 20 + Playwright system dependencies
|
||||
- LibreChat configured with `mcpServers` in `librechat.yaml`
|
||||
|
||||
<Steps>
|
||||
|
||||
### Run the MCP server
|
||||
|
||||
<Tabs items={['Docker Compose', 'Build from source']}>
|
||||
<Tabs.Tab>
|
||||
Add to your `docker-compose.override.yaml`:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
agent-browser-mcp:
|
||||
build:
|
||||
context: ./packages/mcp-servers/agent-browser
|
||||
environment:
|
||||
- PORT=8932
|
||||
# Optional: path to a specific Chromium binary
|
||||
# - CHROMIUM_PATH=/usr/bin/chromium
|
||||
# Internal Docker network only — do not expose publicly without auth
|
||||
# For local dev, uncomment: ports: ["127.0.0.1:8932:8932"]
|
||||
restart: unless-stopped
|
||||
```
|
||||
</Tabs.Tab>
|
||||
<Tabs.Tab>
|
||||
```bash
|
||||
# Clone LibreChat
|
||||
git clone https://github.com/danny-avila/LibreChat
|
||||
cd LibreChat/packages/mcp-servers/agent-browser
|
||||
|
||||
npm install
|
||||
npx playwright install chromium --with-deps
|
||||
|
||||
npm run build
|
||||
npm start
|
||||
```
|
||||
|
||||
The server listens on `http://localhost:8932` by default. Set `PORT` to override.
|
||||
</Tabs.Tab>
|
||||
</Tabs>
|
||||
|
||||
### Configure librechat.yaml
|
||||
|
||||
Add the server to your `librechat.yaml`:
|
||||
|
||||
```yaml
|
||||
# Allow the MCP client to reach the agent-browser server
|
||||
mcpSettings:
|
||||
allowedDomains:
|
||||
- http://agent-browser-mcp:8932
|
||||
- http://localhost:8932
|
||||
|
||||
mcpServers:
|
||||
agent-browser:
|
||||
type: sse
|
||||
url: http://agent-browser-mcp:8932/sse
|
||||
# For local/non-Docker setups:
|
||||
# url: http://localhost:8932/sse
|
||||
```
|
||||
|
||||
</Steps>
|
||||
|
||||
## Environment variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `PORT` | `8932` | HTTP port the MCP server listens on |
|
||||
| `CHROMIUM_PATH` | _(Playwright managed)_ | Path to a custom Chromium binary |
|
||||
|
||||
### Security
|
||||
|
||||
<Callout type="warning">
|
||||
**SSRF protection:** The `navigate` tool validates URLs and rejects requests to private
|
||||
IP ranges (10.x, 192.168.x, 172.16-31.x, 127.x, 169.254.x) and internal hostnames
|
||||
(localhost, .local, .internal). For internal/homelab use, fork the server and adjust
|
||||
the `isAllowedUrl()` function in `src/server.ts`.
|
||||
</Callout>
|
||||
|
||||
## Implementation reference
|
||||
|
||||
If you are building your own MCP SSE server or extending this one, the following pattern is critical.
|
||||
|
||||
### Critical: Do not add `express.json()` middleware
|
||||
|
||||
The MCP `SSEServerTransport.handlePostMessage` reads the raw request stream internally. Adding `express.json()` upstream of the POST `/messages` route causes Express to consume the stream before the SDK can read it, producing **HTTP 400 "stream is not readable"** on every `initialize` call and preventing all tool execution.
|
||||
|
||||
```typescript
|
||||
import express from "express";
|
||||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
||||
|
||||
// CORRECT: no express.json() anywhere on this app
|
||||
const app = express();
|
||||
const transports = new Map<string, SSEServerTransport>();
|
||||
|
||||
app.get("/sse", async (req, res) => {
|
||||
const transport = new SSEServerTransport("/messages", res);
|
||||
transports.set(transport.sessionId, transport);
|
||||
const server = buildMcpServer(); // creates McpServer with all tools
|
||||
await server.connect(transport);
|
||||
res.on("close", () => transports.delete(transport.sessionId));
|
||||
});
|
||||
|
||||
app.post("/messages", async (req, res) => {
|
||||
const transport = transports.get(req.query.sessionId as string);
|
||||
if (!transport) {
|
||||
res.status(404).json({ error: "Session not found" });
|
||||
return;
|
||||
}
|
||||
await transport.handlePostMessage(req, res);
|
||||
});
|
||||
```
|
||||
|
||||
### Session management
|
||||
|
||||
Each LibreChat client connection creates its own `SSEServerTransport` instance on `GET /sse`. The transport's `sessionId` (a UUID generated by the SDK) is appended to the client's POST `/messages` requests as `?sessionId=…`, routing each message back to the correct server-sent events connection.
|
||||
|
||||
### Tool registration pattern
|
||||
|
||||
Tools are registered using the `McpServer` fluent API with [Zod](https://zod.dev) schemas for parameter validation:
|
||||
|
||||
```typescript
|
||||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { z } from "zod";
|
||||
|
||||
function buildMcpServer(): McpServer {
|
||||
const server = new McpServer({ name: "agent-browser", version: "1.0.0" });
|
||||
|
||||
server.tool(
|
||||
"navigate",
|
||||
"Navigate the browser to a URL. Returns the page title.",
|
||||
{
|
||||
url: z
|
||||
.string()
|
||||
.url()
|
||||
.refine(isAllowedUrl, {
|
||||
message:
|
||||
"URL must use http/https and must not point to private, loopback, or link-local addresses.",
|
||||
})
|
||||
.describe("Full URL including https://"),
|
||||
},
|
||||
async ({ url }) => {
|
||||
// ... call agent-browser BrowserManager with a validated, external URL
|
||||
return { content: [{ type: "text", text: `Navigated to: ${title}` }] };
|
||||
}
|
||||
);
|
||||
|
||||
// Register remaining tools...
|
||||
return server;
|
||||
}
|
||||
```
|
||||
|
||||
## Typical agent workflow
|
||||
|
||||
```
|
||||
1. navigate → https://example.com
|
||||
2. snapshot → gets accessibility tree with @e1, @e2, @e3 refs
|
||||
3. fill → @e7 "search query"
|
||||
4. press_key → Enter
|
||||
5. snapshot → inspect updated page
|
||||
6. get_text → .result-list (extract results)
|
||||
```
|
||||
|
||||
<Callout type="info">
|
||||
Call `close_browser` when the task is finished to free Playwright resources. The browser session is shared across tool calls within a single server process, so leaving it open between tasks is intentional but consumes memory.
|
||||
</Callout>
|
||||
|
||||
## Related
|
||||
|
||||
- [MCP Server configuration](https://www.librechat.ai/docs/configuration/librechat_yaml/object_structure/mcp_servers)
|
||||
- [Vercel `agent-browser` npm package](https://www.npmjs.com/package/agent-browser)
|
||||
- [Model Context Protocol SDK](https://github.com/modelcontextprotocol/typescript-sdk)
|
||||
9
packages/mcp-servers/agent-browser/.env.example
Normal file
9
packages/mcp-servers/agent-browser/.env.example
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
PORT=8932
|
||||
CHROMIUM_PATH=/usr/bin/chromium
|
||||
|
||||
# Optional: Perplexica web search integration
|
||||
# PERPLEXICA_URL=http://perplexica:3001
|
||||
# PERPLEXICA_CHAT_PROVIDER=openai
|
||||
# PERPLEXICA_CHAT_MODEL=gpt-4
|
||||
# PERPLEXICA_EMBED_PROVIDER=ollama-embeddings
|
||||
# PERPLEXICA_EMBED_MODEL=nomic-embed-text:latest
|
||||
42
packages/mcp-servers/agent-browser/Dockerfile
Normal file
42
packages/mcp-servers/agent-browser/Dockerfile
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
FROM node:22-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
COPY package.json tsconfig.json ./
|
||||
RUN npm install
|
||||
COPY src/ src/
|
||||
RUN npm run build
|
||||
|
||||
FROM node:22-slim
|
||||
|
||||
# Install Chromium dependencies for Playwright
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
chromium \
|
||||
fonts-liberation \
|
||||
libasound2 \
|
||||
libatk-bridge2.0-0 \
|
||||
libatk1.0-0 \
|
||||
libcups2 \
|
||||
libdbus-1-3 \
|
||||
libdrm2 \
|
||||
libgbm1 \
|
||||
libgtk-3-0 \
|
||||
libnspr4 \
|
||||
libnss3 \
|
||||
libx11-xcb1 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxrandr2 \
|
||||
xdg-utils \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN groupadd -r appuser && useradd -r -g appuser -d /app appuser
|
||||
WORKDIR /app
|
||||
COPY --from=builder /app/dist dist/
|
||||
COPY --from=builder /app/node_modules node_modules/
|
||||
COPY package.json ./
|
||||
|
||||
ENV CHROMIUM_PATH=/usr/bin/chromium
|
||||
USER appuser
|
||||
EXPOSE 8932
|
||||
|
||||
CMD ["node", "dist/server.js"]
|
||||
42
packages/mcp-servers/agent-browser/README.md
Normal file
42
packages/mcp-servers/agent-browser/README.md
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
# @librechat/mcp-agent-browser
|
||||
|
||||
Vercel [agent-browser](https://github.com/vercel-labs/agent-browser) wrapped as an MCP SSE server for LibreChat.
|
||||
|
||||
Uses Playwright with AI-optimised accessibility tree `@ref` snapshots — significantly better than raw CSS selectors for LLM-driven browser automation.
|
||||
|
||||
## Tools
|
||||
|
||||
| Tool | Description |
|
||||
| --- | --- |
|
||||
| `navigate` | Navigate to a URL (SSRF-protected) |
|
||||
| `snapshot` | Get accessibility snapshot with `@ref` identifiers |
|
||||
| `click` | Click element by `@ref` or CSS selector |
|
||||
| `fill` | Fill form input by `@ref` or CSS selector |
|
||||
| `get_text` | Get text content of an element |
|
||||
| `press_key` | Press a keyboard key |
|
||||
| `screenshot` | Take page screenshot |
|
||||
| `get_url` | Get current URL |
|
||||
| `close_browser` | Close browser session |
|
||||
| `perplexica_search` | *(Optional)* Web search via Perplexica |
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
docker build -t agent-browser-mcp .
|
||||
docker run -p 8932:8932 agent-browser-mcp
|
||||
```
|
||||
|
||||
## LibreChat Configuration
|
||||
|
||||
```yaml
|
||||
mcpServers:
|
||||
agent-browser:
|
||||
type: sse
|
||||
url: http://agent-browser-mcp:8932/sse
|
||||
```
|
||||
|
||||
## Security
|
||||
|
||||
- **SSRF protection**: The `navigate` tool rejects private IPs (10.x, 192.168.x, 172.16-31.x, 127.x) and internal hostnames.
|
||||
- Runs as non-root `appuser` in Docker.
|
||||
- No `express.json()` middleware — see source comments for explanation.
|
||||
24
packages/mcp-servers/agent-browser/package.json
Normal file
24
packages/mcp-servers/agent-browser/package.json
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"name": "@librechat/mcp-agent-browser",
|
||||
"version": "1.0.0",
|
||||
"description": "Vercel agent-browser MCP SSE server for LibreChat — Playwright-based browser automation with AI-optimised @ref snapshots",
|
||||
"type": "module",
|
||||
"main": "dist/server.js",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"start": "node dist/server.js",
|
||||
"dev": "tsx src/server.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.0.0",
|
||||
"agent-browser": "^0.16.0",
|
||||
"express": "^4.21.0",
|
||||
"zod": "^3.23.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/node": "^22.0.0",
|
||||
"tsx": "^4.19.0",
|
||||
"typescript": "^5.6.0"
|
||||
}
|
||||
}
|
||||
283
packages/mcp-servers/agent-browser/src/server.ts
Normal file
283
packages/mcp-servers/agent-browser/src/server.ts
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
import express, { Request, Response } from "express";
|
||||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
||||
import { z } from "zod";
|
||||
import { BrowserManager } from "agent-browser/dist/browser.js";
|
||||
import { executeCommand } from "agent-browser/dist/actions.js";
|
||||
|
||||
const PORT = parseInt(process.env.PORT ?? "8932");
|
||||
const CHROMIUM_PATH = process.env.CHROMIUM_PATH ?? "";
|
||||
|
||||
// Optional Perplexica integration — only enabled if PERPLEXICA_URL is set
|
||||
const PERPLEXICA_URL = process.env.PERPLEXICA_URL ?? "";
|
||||
const PERPLEXICA_CHAT_PROVIDER = process.env.PERPLEXICA_CHAT_PROVIDER ?? "";
|
||||
const PERPLEXICA_CHAT_MODEL = process.env.PERPLEXICA_CHAT_MODEL ?? "";
|
||||
const PERPLEXICA_EMBED_PROVIDER = process.env.PERPLEXICA_EMBED_PROVIDER ?? "";
|
||||
const PERPLEXICA_EMBED_MODEL = process.env.PERPLEXICA_EMBED_MODEL ?? "";
|
||||
|
||||
let browser: BrowserManager | null = null;
|
||||
let cmdId = 0;
|
||||
const nextId = () => `c${++cmdId}`;
|
||||
|
||||
async function getBrowser(): Promise<BrowserManager> {
|
||||
if (!browser?.isLaunched()) {
|
||||
browser = new BrowserManager();
|
||||
const launchCmd: Record<string, unknown> = { id: nextId(), action: "launch", headless: true };
|
||||
if (CHROMIUM_PATH) launchCmd.executablePath = CHROMIUM_PATH;
|
||||
const resp = await executeCommand(launchCmd as any, browser);
|
||||
if (!resp.success) throw new Error(`Browser launch failed: ${(resp as any).error}`);
|
||||
}
|
||||
return browser;
|
||||
}
|
||||
|
||||
async function cmd<T = unknown>(command: Record<string, unknown>): Promise<T> {
|
||||
const b = await getBrowser();
|
||||
const resp = await executeCommand({ id: nextId(), ...command } as any, b);
|
||||
if (!resp.success) throw new Error((resp as any).error ?? "Command failed");
|
||||
return (resp as any).data as T;
|
||||
}
|
||||
|
||||
// --- SSRF Protection ---
|
||||
const isPrivateHostname = (hostname: string): boolean => {
|
||||
const lower = hostname.toLowerCase();
|
||||
return (
|
||||
lower === "localhost" ||
|
||||
lower === "ip6-localhost" ||
|
||||
lower.endsWith(".local") ||
|
||||
lower.endsWith(".internal")
|
||||
);
|
||||
};
|
||||
|
||||
const isPrivateIp = (hostname: string): boolean => {
|
||||
if (/^\d{1,3}(\.\d{1,3}){3}$/.test(hostname)) {
|
||||
const parts = hostname.split(".").map(Number);
|
||||
const [a, b] = parts;
|
||||
if (a === 10) return true;
|
||||
if (a === 127) return true;
|
||||
if (a === 169 && b === 254) return true;
|
||||
if (a === 172 && b >= 16 && b <= 31) return true;
|
||||
if (a === 192 && b === 168) return true;
|
||||
}
|
||||
return hostname === "::1";
|
||||
};
|
||||
|
||||
const isAllowedUrl = (value: string): boolean => {
|
||||
try {
|
||||
const parsed = new URL(value);
|
||||
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return false;
|
||||
return !isPrivateHostname(parsed.hostname) && !isPrivateIp(parsed.hostname);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// --- Optional Perplexica search ---
|
||||
async function perplexicaChat(query: string): Promise<string> {
|
||||
if (!PERPLEXICA_URL) throw new Error("Perplexica not configured");
|
||||
const messageId = `msg-${Date.now()}`;
|
||||
const chatId = `chat-${Date.now()}`;
|
||||
const body = {
|
||||
message: { messageId, chatId, role: "user", content: query },
|
||||
chatModel: { providerId: PERPLEXICA_CHAT_PROVIDER, key: PERPLEXICA_CHAT_MODEL },
|
||||
embeddingModel: { providerId: PERPLEXICA_EMBED_PROVIDER, key: PERPLEXICA_EMBED_MODEL },
|
||||
sources: ["web"],
|
||||
optimizationMode: "speed",
|
||||
history: [],
|
||||
};
|
||||
|
||||
const resp = await fetch(`${PERPLEXICA_URL}/api/chat`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!resp.ok) throw new Error(`Perplexica ${resp.status}: ${await resp.text()}`);
|
||||
|
||||
const rawText = await resp.text();
|
||||
const blockValues: Map<string, string> = new Map();
|
||||
for (const line of rawText.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
let event: any;
|
||||
try { event = JSON.parse(trimmed); } catch { continue; }
|
||||
if (event.type === "error") throw new Error(event.data ?? "Perplexica error");
|
||||
if (event.type === "updateBlock" && Array.isArray(event.patch)) {
|
||||
for (const patch of event.patch) {
|
||||
if (patch.op === "replace" && patch.path === "/data") {
|
||||
blockValues.set(event.blockId, String(patch.value ?? ""));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return Array.from(blockValues.values()).join("\n\n").trim() || "No response from Perplexica";
|
||||
}
|
||||
|
||||
function buildMcpServer(): McpServer {
|
||||
const server = new McpServer({ name: "agent-browser", version: "1.0.0" });
|
||||
|
||||
// Register Perplexica search only if configured
|
||||
if (PERPLEXICA_URL) {
|
||||
server.tool(
|
||||
"perplexica_search",
|
||||
"Search the web using Perplexica AI (gives cited answers).",
|
||||
{ query: z.string().describe("Search query") },
|
||||
async ({ query }: { query: string }) => {
|
||||
try {
|
||||
const result = await perplexicaChat(query);
|
||||
return { content: [{ type: "text", text: result }] };
|
||||
} catch (e) {
|
||||
return { content: [{ type: "text", text: `Perplexica error: ${String(e)}` }] };
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
server.tool(
|
||||
"navigate",
|
||||
"Navigate the browser to a URL. Returns the page title. SSRF-protected: rejects private/internal addresses.",
|
||||
{
|
||||
url: z.string().url().refine(isAllowedUrl, {
|
||||
message: "URL must use http/https and must not point to private or loopback addresses.",
|
||||
}).describe("Full public URL including https://"),
|
||||
},
|
||||
async ({ url }: { url: string }) => {
|
||||
const data = await cmd<{ url: string; title: string }>({ action: "navigate", url });
|
||||
return { content: [{ type: "text", text: `Navigated to: ${data.title} (${data.url})` }] };
|
||||
}
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"snapshot",
|
||||
"Get an accessibility snapshot of the current page with @ref identifiers. Use refs with click/fill tools.",
|
||||
{},
|
||||
async () => {
|
||||
const data = await cmd<{ snapshot: string; origin?: string }>({
|
||||
action: "snapshot",
|
||||
interactive: true,
|
||||
});
|
||||
return { content: [{ type: "text", text: data.snapshot }] };
|
||||
}
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"click",
|
||||
"Click an element by @ref (from snapshot) or CSS selector.",
|
||||
{ ref: z.string().describe("@ref from snapshot (e.g. '@e1') or CSS selector") },
|
||||
async ({ ref }: { ref: string }) => {
|
||||
await cmd({ action: "click", selector: ref });
|
||||
return { content: [{ type: "text", text: `Clicked ${ref}` }] };
|
||||
}
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"fill",
|
||||
"Clear a form input and type a new value. Use @ref from snapshot or CSS selector.",
|
||||
{
|
||||
ref: z.string().describe("@ref from snapshot or CSS selector"),
|
||||
value: z.string().describe("Value to enter"),
|
||||
},
|
||||
async ({ ref, value }: { ref: string; value: string }) => {
|
||||
await cmd({ action: "fill", selector: ref, value });
|
||||
return { content: [{ type: "text", text: `Filled ${ref} with "${value}"` }] };
|
||||
}
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"get_text",
|
||||
"Get the text content of an element by CSS selector.",
|
||||
{ selector: z.string().describe("CSS selector") },
|
||||
async ({ selector }: { selector: string }) => {
|
||||
const data = await cmd<{ text: string; origin?: string }>({ action: "gettext", selector });
|
||||
return { content: [{ type: "text", text: data.text.slice(0, 2000) }] };
|
||||
}
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"press_key",
|
||||
"Press a keyboard key globally (e.g. Enter, Tab, Escape, ArrowDown).",
|
||||
{ key: z.string().describe("Key name e.g. Enter, Tab, ArrowDown") },
|
||||
async ({ key }: { key: string }) => {
|
||||
const b = await getBrowser();
|
||||
await b.getPage().keyboard.press(key);
|
||||
return { content: [{ type: "text", text: `Pressed ${key}` }] };
|
||||
}
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"screenshot",
|
||||
"Take a screenshot of the current page.",
|
||||
{},
|
||||
async () => {
|
||||
const b = await getBrowser();
|
||||
const page = b.getPage();
|
||||
await page.screenshot({ path: "/tmp/screenshot.png" });
|
||||
return { content: [{ type: "text", text: "Screenshot taken (saved to /tmp/screenshot.png)" }] };
|
||||
}
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"get_url",
|
||||
"Get the current browser URL.",
|
||||
{},
|
||||
async () => {
|
||||
const data = await cmd<{ url: string }>({ action: "url" });
|
||||
return { content: [{ type: "text", text: data.url }] };
|
||||
}
|
||||
);
|
||||
|
||||
server.tool(
|
||||
"close_browser",
|
||||
"Close the browser session and free resources.",
|
||||
{},
|
||||
async () => {
|
||||
if (browser) {
|
||||
const b = browser.getBrowser();
|
||||
if (b) await b.close().catch(() => {});
|
||||
browser = null;
|
||||
}
|
||||
return { content: [{ type: "text", text: "Browser closed" }] };
|
||||
}
|
||||
);
|
||||
|
||||
return server;
|
||||
}
|
||||
|
||||
// CRITICAL: Do NOT add express.json() or any body-parsing middleware here.
|
||||
// SSEServerTransport.handlePostMessage() reads the raw request body as a Node.js readable
|
||||
// stream. If express.json() pre-consumes the stream, every MCP initialize handshake fails
|
||||
// with HTTP 400 "stream is not readable", silently preventing all tool execution.
|
||||
const app = express();
|
||||
const transports: Map<string, SSEServerTransport> = new Map();
|
||||
|
||||
app.get("/health", (_req: Request, res: Response) => {
|
||||
const tools = [
|
||||
"navigate", "snapshot", "click", "fill", "get_text",
|
||||
"press_key", "screenshot", "get_url", "close_browser",
|
||||
];
|
||||
if (PERPLEXICA_URL) tools.unshift("perplexica_search");
|
||||
res.json({ status: "ok", tools });
|
||||
});
|
||||
|
||||
app.get("/sse", async (req: Request, res: Response) => {
|
||||
const transport = new SSEServerTransport("/messages", res);
|
||||
const id = transport.sessionId;
|
||||
transports.set(id, transport);
|
||||
const server = buildMcpServer();
|
||||
await server.connect(transport);
|
||||
res.on("close", () => transports.delete(id));
|
||||
});
|
||||
|
||||
app.post("/messages", async (req: Request, res: Response) => {
|
||||
const id = req.query.sessionId as string;
|
||||
const transport = transports.get(id);
|
||||
if (!transport) {
|
||||
res.status(404).json({ error: "Session not found" });
|
||||
return;
|
||||
}
|
||||
await transport.handlePostMessage(req, res);
|
||||
});
|
||||
|
||||
app.listen(PORT, () => {
|
||||
console.log(`agent-browser MCP server listening on port ${PORT}`);
|
||||
if (PERPLEXICA_URL) console.log(`Perplexica integration enabled: ${PERPLEXICA_URL}`);
|
||||
else console.log("Perplexica integration disabled (set PERPLEXICA_URL to enable)");
|
||||
});
|
||||
14
packages/mcp-servers/agent-browser/tsconfig.json
Normal file
14
packages/mcp-servers/agent-browser/tsconfig.json
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"outDir": "dist",
|
||||
"rootDir": "src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"declaration": true
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue