diff --git a/src/fetch/README.md b/src/fetch/README.md index 5324e50731..0cbdf3819a 100644 --- a/src/fetch/README.md +++ b/src/fetch/README.md @@ -2,8 +2,17 @@ A Model Context Protocol server that provides web content fetching capabilities. This server enables LLMs to retrieve and process content from web pages, converting HTML to markdown for easier consumption. -> [!CAUTION] -> This server can access local/internal IP addresses and may represent a security risk. Exercise caution when using this MCP server to ensure this does not expose any sensitive data. +## Security + +**By default, this server blocks access to local/internal IP addresses** (127.0.0.0/8, 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, etc.) to prevent access to internal networks and services. This provides security against potential misuse. + +You can customize security settings using the following options: +- `--allowed-hosts`: Specify allowed hostnames/domains (supports wildcards like *.example.com) +- `--allow-private-ips`: Allow access to private/internal IP ranges +- `--blocked-ip-ranges`: Specify additional CIDR ranges to block + +> [!TIP] +> For maximum security in production environments, use `--allowed-hosts` to explicitly whitelist only the domains you need to access. The fetch tool will truncate the response, but by using the `start_index` argument, you can specify where to start the content extraction. This lets models read a webpage in chunks, until they find the information they need. @@ -168,6 +177,44 @@ This can be customized by adding the argument `--user-agent=YourUserAgent` to th The server can be configured to use a proxy by using the `--proxy-url` argument. +### Security Configuration Examples + +**Allow specific domains only:** +```json +{ + "mcpServers": { + "fetch": { + "command": "uvx", + "args": ["mcp-server-fetch", "--allowed-hosts", "example.com", "*.github.com", "docs.python.org"] + } + } +} +``` + +**Allow internal IPs for development (NOT recommended for production):** +```json +{ + "mcpServers": { + "fetch": { + "command": "uvx", + "args": ["mcp-server-fetch", "--allow-private-ips"] + } + } +} +``` + +**Custom blocked IP ranges:** +```json +{ + "mcpServers": { + "fetch": { + "command": "uvx", + "args": ["mcp-server-fetch", "--blocked-ip-ranges", "203.0.113.0/24", "198.51.100.0/24"] + } + } +} +``` + ## Debugging You can use the MCP inspector to debug the server. For uvx installations: diff --git a/src/fetch/src/mcp_server_fetch/__init__.py b/src/fetch/src/mcp_server_fetch/__init__.py index 09744ce319..e09a61d5ad 100644 --- a/src/fetch/src/mcp_server_fetch/__init__.py +++ b/src/fetch/src/mcp_server_fetch/__init__.py @@ -16,9 +16,33 @@ def main(): help="Ignore robots.txt restrictions", ) parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests") + parser.add_argument( + "--allowed-hosts", + type=str, + nargs="*", + help="List of allowed hostnames/domains (supports wildcards like *.example.com). If not specified, all hosts are allowed unless blocked by IP restrictions.", + ) + parser.add_argument( + "--allow-private-ips", + action="store_true", + help="Allow access to private/internal IP ranges (127.0.0.0/8, 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, etc.)", + ) + parser.add_argument( + "--blocked-ip-ranges", + type=str, + nargs="*", + help="Custom list of CIDR ranges to block (in addition to or instead of default private ranges)", + ) args = parser.parse_args() - asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url)) + asyncio.run(serve( + args.user_agent, + args.ignore_robots_txt, + args.proxy_url, + args.allowed_hosts, + args.allow_private_ips, + args.blocked_ip_ranges, + )) if __name__ == "__main__": diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index 2df9d3b604..e2cb24c054 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -1,5 +1,7 @@ from typing import Annotated, Tuple from urllib.parse import urlparse, urlunparse +import ipaddress +import socket import markdownify import readabilipy.simple_json @@ -23,6 +25,131 @@ DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)" DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)" +# Default blocked IP ranges (local/internal networks) +DEFAULT_BLOCKED_IP_RANGES = [ + "127.0.0.0/8", # Loopback + "10.0.0.0/8", # Private Class A + "172.16.0.0/12", # Private Class B + "192.168.0.0/16", # Private Class C + "169.254.0.0/16", # Link-local + "224.0.0.0/4", # Multicast + "::1/128", # IPv6 loopback + "fc00::/7", # IPv6 unique local + "fe80::/10", # IPv6 link-local +] + + +def is_ip_in_ranges(ip_str: str, ranges: list[str]) -> bool: + """Check if an IP address is within any of the specified CIDR ranges. + + Args: + ip_str: IP address string to check + ranges: List of CIDR range strings + + Returns: + True if IP is in any of the ranges, False otherwise + """ + try: + ip = ipaddress.ip_address(ip_str) + for range_str in ranges: + try: + network = ipaddress.ip_network(range_str, strict=False) + if ip in network: + return True + except ValueError: + continue + return False + except ValueError: + return False + + +def resolve_hostname_to_ips(hostname: str) -> list[str]: + """Resolve a hostname to its IP addresses. + + Args: + hostname: Hostname to resolve + + Returns: + List of IP addresses as strings + + Raises: + socket.gaierror: If hostname cannot be resolved + """ + try: + # Get both IPv4 and IPv6 addresses + addr_info = socket.getaddrinfo(hostname, None, family=socket.AF_UNSPEC, type=socket.SOCK_STREAM) + ips = list(set(info[4][0] for info in addr_info)) + return ips + except socket.gaierror: + raise + + +async def validate_url_against_allowlist( + url: str, + allowed_hosts: list[str] | None = None, + blocked_ip_ranges: list[str] | None = None, + allow_private_ips: bool = False +) -> None: + """Validate a URL against host allowlist and IP range restrictions. + + Args: + url: URL to validate + allowed_hosts: List of allowed hostnames/domains. If None, all hosts allowed by IP rules + blocked_ip_ranges: List of CIDR ranges to block. If None, uses DEFAULT_BLOCKED_IP_RANGES + allow_private_ips: If True, allows access to private/internal IP ranges + + Raises: + McpError: If URL is not allowed + """ + if blocked_ip_ranges is None: + blocked_ip_ranges = DEFAULT_BLOCKED_IP_RANGES if not allow_private_ips else [] + + parsed = urlparse(url) + hostname = parsed.hostname + + if not hostname: + raise McpError(ErrorData( + code=INVALID_PARAMS, + message="Invalid URL: no hostname found" + )) + + # Check against allowed hosts list if provided + if allowed_hosts is not None: + host_allowed = False + for allowed_host in allowed_hosts: + if allowed_host.startswith('*.'): + # Wildcard domain matching + domain_suffix = allowed_host[2:] + if hostname == domain_suffix or hostname.endswith('.' + domain_suffix): + host_allowed = True + break + elif hostname == allowed_host: + host_allowed = True + break + + if not host_allowed: + raise McpError(ErrorData( + code=INTERNAL_ERROR, + message=f"Host '{hostname}' is not in the allowed hosts list" + )) + + # Resolve hostname to IP addresses and check against blocked ranges + if blocked_ip_ranges: + try: + ips = resolve_hostname_to_ips(hostname) + except socket.gaierror as e: + raise McpError(ErrorData( + code=INTERNAL_ERROR, + message=f"Failed to resolve hostname '{hostname}': {e}" + )) + + for ip in ips: + if is_ip_in_ranges(ip, blocked_ip_ranges): + raise McpError(ErrorData( + code=INTERNAL_ERROR, + message=f"Access to IP address '{ip}' (resolved from '{hostname}') is blocked as it falls within restricted IP ranges" + )) + def extract_content_from_html(html: str) -> str: """Extract and convert HTML content to Markdown format. @@ -182,6 +309,9 @@ async def serve( custom_user_agent: str | None = None, ignore_robots_txt: bool = False, proxy_url: str | None = None, + allowed_hosts: list[str] | None = None, + allow_private_ips: bool = False, + blocked_ip_ranges: list[str] | None = None, ) -> None: """Run the fetch MCP server. @@ -189,6 +319,9 @@ async def serve( custom_user_agent: Optional custom User-Agent string to use for requests ignore_robots_txt: Whether to ignore robots.txt restrictions proxy_url: Optional proxy URL to use for requests + allowed_hosts: Optional list of allowed hostnames/domains (supports wildcards like *.example.com) + allow_private_ips: Whether to allow access to private/internal IP ranges + blocked_ip_ranges: Optional list of custom CIDR ranges to block """ server = Server("mcp-fetch") user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS @@ -231,6 +364,11 @@ async def call_tool(name, arguments: dict) -> list[TextContent]: if not url: raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required")) + # Validate URL against allowlist and IP restrictions + await validate_url_against_allowlist( + url, allowed_hosts, blocked_ip_ranges, allow_private_ips + ) + if not ignore_robots_txt: await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url) @@ -262,6 +400,11 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult: url = arguments["url"] try: + # Validate URL against allowlist and IP restrictions + await validate_url_against_allowlist( + url, allowed_hosts, blocked_ip_ranges, allow_private_ips + ) + content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url) # TODO: after SDK bug is addressed, don't catch the exception except McpError as e: