mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-05 01:05:51 +00:00
Ensure custom user agents are applied to direct web requests and disabling Jina Reader forces the local readability path. Made-with: Cursor
176 lines
5.4 KiB
Python
176 lines
5.4 KiB
Python
"""Tests for web_fetch SSRF protection and untrusted content marking."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import socket
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from nanobot.agent.tools.web import WebFetchTool
|
|
from nanobot.config.schema import WebFetchConfig
|
|
|
|
|
|
def _fake_resolve_private(hostname, port, family=0, type_=0):
|
|
return [(socket.AF_INET, socket.SOCK_STREAM, 0, "", ("169.254.169.254", 0))]
|
|
|
|
|
|
def _fake_resolve_public(hostname, port, family=0, type_=0):
|
|
return [(socket.AF_INET, socket.SOCK_STREAM, 0, "", ("93.184.216.34", 0))]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_web_fetch_blocks_private_ip():
|
|
tool = WebFetchTool()
|
|
with patch("nanobot.security.network.socket.getaddrinfo", _fake_resolve_private):
|
|
result = await tool.execute(url="http://169.254.169.254/computeMetadata/v1/")
|
|
data = json.loads(result)
|
|
assert "error" in data
|
|
assert "private" in data["error"].lower() or "blocked" in data["error"].lower()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_web_fetch_blocks_localhost():
|
|
tool = WebFetchTool()
|
|
def _resolve_localhost(hostname, port, family=0, type_=0):
|
|
return [(socket.AF_INET, socket.SOCK_STREAM, 0, "", ("127.0.0.1", 0))]
|
|
with patch("nanobot.security.network.socket.getaddrinfo", _resolve_localhost):
|
|
result = await tool.execute(url="http://localhost/admin")
|
|
data = json.loads(result)
|
|
assert "error" in data
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_web_fetch_result_contains_untrusted_flag():
|
|
"""When fetch succeeds, result JSON must include untrusted=True and the banner."""
|
|
tool = WebFetchTool()
|
|
|
|
fake_html = "<html><head><title>Test</title></head><body><p>Hello world</p></body></html>"
|
|
|
|
|
|
class FakeResponse:
|
|
status_code = 200
|
|
url = "https://example.com/page"
|
|
text = fake_html
|
|
headers = {"content-type": "text/html"}
|
|
def raise_for_status(self): pass
|
|
def json(self): return {}
|
|
|
|
async def _fake_get(self, url, **kwargs):
|
|
return FakeResponse()
|
|
|
|
with patch("nanobot.security.network.socket.getaddrinfo", _fake_resolve_public), \
|
|
patch("httpx.AsyncClient.get", _fake_get):
|
|
result = await tool.execute(url="https://example.com/page")
|
|
|
|
data = json.loads(result)
|
|
assert data.get("untrusted") is True
|
|
assert "[External content" in data.get("text", "")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_web_fetch_can_skip_jina_and_use_custom_user_agent(monkeypatch):
|
|
tool = WebFetchTool(
|
|
config=WebFetchConfig(use_jina_reader=False),
|
|
user_agent="nanobot-test-agent",
|
|
)
|
|
seen_headers: list[dict] = []
|
|
|
|
async def _fail_jina(*args, **kwargs):
|
|
raise AssertionError("Jina Reader should be skipped when disabled")
|
|
|
|
class FakeStreamResponse:
|
|
headers = {"content-type": "text/html"}
|
|
url = "https://example.com/page"
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc, tb):
|
|
return False
|
|
|
|
class FakeResponse:
|
|
status_code = 200
|
|
url = "https://example.com/page"
|
|
text = "<html><head><title>Test</title></head><body><p>Hello world</p></body></html>"
|
|
headers = {"content-type": "text/html"}
|
|
|
|
def raise_for_status(self):
|
|
return None
|
|
|
|
class FakeClient:
|
|
def __init__(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc, tb):
|
|
return False
|
|
|
|
def stream(self, method, url, headers=None):
|
|
seen_headers.append(headers or {})
|
|
return FakeStreamResponse()
|
|
|
|
async def get(self, url, headers=None):
|
|
seen_headers.append(headers or {})
|
|
return FakeResponse()
|
|
|
|
monkeypatch.setattr(tool, "_fetch_jina", _fail_jina)
|
|
monkeypatch.setattr("nanobot.agent.tools.web.httpx.AsyncClient", FakeClient)
|
|
|
|
with patch("nanobot.security.network.socket.getaddrinfo", _fake_resolve_public):
|
|
result = await tool.execute(url="https://example.com/page")
|
|
|
|
data = json.loads(result)
|
|
assert data["extractor"] == "readability"
|
|
assert [headers["User-Agent"] for headers in seen_headers] == [
|
|
"nanobot-test-agent",
|
|
"nanobot-test-agent",
|
|
]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_web_fetch_blocks_private_redirect_before_returning_image(monkeypatch):
|
|
tool = WebFetchTool()
|
|
|
|
class FakeStreamResponse:
|
|
headers = {"content-type": "image/png"}
|
|
url = "http://127.0.0.1/secret.png"
|
|
content = b"\x89PNG\r\n\x1a\n"
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc, tb):
|
|
return False
|
|
|
|
async def aread(self):
|
|
return self.content
|
|
|
|
def raise_for_status(self):
|
|
return None
|
|
|
|
class FakeClient:
|
|
def __init__(self, *args, **kwargs):
|
|
pass
|
|
|
|
async def __aenter__(self):
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc, tb):
|
|
return False
|
|
|
|
def stream(self, method, url, headers=None):
|
|
return FakeStreamResponse()
|
|
|
|
monkeypatch.setattr("nanobot.agent.tools.web.httpx.AsyncClient", FakeClient)
|
|
|
|
with patch("nanobot.security.network.socket.getaddrinfo", _fake_resolve_public):
|
|
result = await tool.execute(url="https://example.com/image.png")
|
|
|
|
data = json.loads(result)
|
|
assert "error" in data
|
|
assert "redirect blocked" in data["error"].lower()
|