mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-05 01:05:51 +00:00
test(web): cover configurable fetch behavior
Ensure custom user agents are applied to direct web requests and disabling Jina Reader forces the local readability path. Made-with: Cursor
This commit is contained in:
parent
18432c313f
commit
f4d8783f5e
@ -11,8 +11,7 @@ from typing import Any
|
||||
from loguru import logger
|
||||
|
||||
from nanobot.agent.hook import AgentHook, AgentHookContext
|
||||
from nanobot.utils.prompt_templates import render_template
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
from nanobot.agent.runner import AgentRunner, AgentRunSpec
|
||||
from nanobot.agent.skills import BUILTIN_SKILLS_DIR
|
||||
from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool
|
||||
from nanobot.agent.tools.registry import ToolRegistry
|
||||
@ -23,6 +22,7 @@ from nanobot.bus.events import InboundMessage
|
||||
from nanobot.bus.queue import MessageBus
|
||||
from nanobot.config.schema import ExecToolConfig, WebToolsConfig
|
||||
from nanobot.providers.base import LLMProvider
|
||||
from nanobot.utils.prompt_templates import render_template
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
|
||||
@ -18,7 +18,7 @@ from nanobot.agent.tools.schema import IntegerSchema, StringSchema, tool_paramet
|
||||
from nanobot.utils.helpers import build_image_content_blocks
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nanobot.config.schema import WebSearchConfig, WebFetchConfig
|
||||
from nanobot.config.schema import WebFetchConfig, WebSearchConfig
|
||||
|
||||
# Shared constants
|
||||
_DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36"
|
||||
@ -159,7 +159,11 @@ class WebSearchTool(Tool):
|
||||
r = await client.get(
|
||||
"https://api.search.brave.com/res/v1/web/search",
|
||||
params={"q": query, "count": n},
|
||||
headers={"Accept": "application/json", "X-Subscription-Token": api_key},
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"X-Subscription-Token": api_key,
|
||||
"User-Agent": self.user_agent,
|
||||
},
|
||||
timeout=10.0,
|
||||
)
|
||||
r.raise_for_status()
|
||||
@ -180,7 +184,7 @@ class WebSearchTool(Tool):
|
||||
async with httpx.AsyncClient(proxy=self.proxy) as client:
|
||||
r = await client.post(
|
||||
"https://api.tavily.com/search",
|
||||
headers={"Authorization": f"Bearer {api_key}"},
|
||||
headers={"Authorization": f"Bearer {api_key}", "User-Agent": self.user_agent},
|
||||
json={"query": query, "max_results": n},
|
||||
timeout=15.0,
|
||||
)
|
||||
@ -217,7 +221,11 @@ class WebSearchTool(Tool):
|
||||
logger.warning("JINA_API_KEY not set, falling back to DuckDuckGo")
|
||||
return await self._search_duckduckgo(query, n)
|
||||
try:
|
||||
headers = {"Accept": "application/json", "Authorization": f"Bearer {api_key}"}
|
||||
headers = {
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"User-Agent": self.user_agent,
|
||||
}
|
||||
encoded_query = quote(query, safe="")
|
||||
async with httpx.AsyncClient(proxy=self.proxy) as client:
|
||||
r = await client.get(
|
||||
@ -246,7 +254,7 @@ class WebSearchTool(Tool):
|
||||
r = await client.get(
|
||||
"https://kagi.com/api/v0/search",
|
||||
params={"q": query, "limit": n},
|
||||
headers={"Authorization": f"Bot {api_key}"},
|
||||
headers={"Authorization": f"Bot {api_key}", "User-Agent": self.user_agent},
|
||||
timeout=10.0,
|
||||
)
|
||||
r.raise_for_status()
|
||||
@ -316,8 +324,15 @@ class WebFetchTool(Tool):
|
||||
def read_only(self) -> bool:
|
||||
return True
|
||||
|
||||
async def execute(self, url: str, extractMode: str = "markdown", maxChars: int | None = None, **kwargs: Any) -> Any:
|
||||
max_chars = maxChars or self.max_chars
|
||||
async def execute(
|
||||
self,
|
||||
url: str,
|
||||
extract_mode: str = "markdown",
|
||||
max_chars: int | None = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
extract_mode = kwargs.pop("extractMode", extract_mode)
|
||||
max_chars = kwargs.pop("maxChars", max_chars) or self.max_chars
|
||||
is_valid, error_msg = _validate_url_safe(url)
|
||||
if not is_valid:
|
||||
return json.dumps({"error": f"URL validation failed: {error_msg}", "url": url}, ensure_ascii=False)
|
||||
@ -344,7 +359,7 @@ class WebFetchTool(Tool):
|
||||
if self.config.use_jina_reader:
|
||||
result = await self._fetch_jina(url, max_chars)
|
||||
if result is None:
|
||||
result = await self._fetch_readability(url, extractMode, max_chars)
|
||||
result = await self._fetch_readability(url, extract_mode, max_chars)
|
||||
return result
|
||||
|
||||
async def _fetch_jina(self, url: str, max_chars: int) -> str | None:
|
||||
|
||||
@ -9,6 +9,7 @@ from unittest.mock import patch
|
||||
import pytest
|
||||
|
||||
from nanobot.agent.tools.web import WebFetchTool
|
||||
from nanobot.config.schema import WebFetchConfig
|
||||
|
||||
|
||||
def _fake_resolve_private(hostname, port, family=0, type_=0):
|
||||
@ -47,7 +48,6 @@ async def test_web_fetch_result_contains_untrusted_flag():
|
||||
|
||||
fake_html = "<html><head><title>Test</title></head><body><p>Hello world</p></body></html>"
|
||||
|
||||
import httpx
|
||||
|
||||
class FakeResponse:
|
||||
status_code = 200
|
||||
@ -69,6 +69,68 @@ async def test_web_fetch_result_contains_untrusted_flag():
|
||||
assert "[External content" in data.get("text", "")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_fetch_can_skip_jina_and_use_custom_user_agent(monkeypatch):
|
||||
tool = WebFetchTool(
|
||||
config=WebFetchConfig(use_jina_reader=False),
|
||||
user_agent="nanobot-test-agent",
|
||||
)
|
||||
seen_headers: list[dict] = []
|
||||
|
||||
async def _fail_jina(*args, **kwargs):
|
||||
raise AssertionError("Jina Reader should be skipped when disabled")
|
||||
|
||||
class FakeStreamResponse:
|
||||
headers = {"content-type": "text/html"}
|
||||
url = "https://example.com/page"
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
class FakeResponse:
|
||||
status_code = 200
|
||||
url = "https://example.com/page"
|
||||
text = "<html><head><title>Test</title></head><body><p>Hello world</p></body></html>"
|
||||
headers = {"content-type": "text/html"}
|
||||
|
||||
def raise_for_status(self):
|
||||
return None
|
||||
|
||||
class FakeClient:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def stream(self, method, url, headers=None):
|
||||
seen_headers.append(headers or {})
|
||||
return FakeStreamResponse()
|
||||
|
||||
async def get(self, url, headers=None):
|
||||
seen_headers.append(headers or {})
|
||||
return FakeResponse()
|
||||
|
||||
monkeypatch.setattr(tool, "_fetch_jina", _fail_jina)
|
||||
monkeypatch.setattr("nanobot.agent.tools.web.httpx.AsyncClient", FakeClient)
|
||||
|
||||
with patch("nanobot.security.network.socket.getaddrinfo", _fake_resolve_public):
|
||||
result = await tool.execute(url="https://example.com/page")
|
||||
|
||||
data = json.loads(result)
|
||||
assert data["extractor"] == "readability"
|
||||
assert [headers["User-Agent"] for headers in seen_headers] == [
|
||||
"nanobot-test-agent",
|
||||
"nanobot-test-agent",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_web_fetch_blocks_private_redirect_before_returning_image(monkeypatch):
|
||||
tool = WebFetchTool()
|
||||
|
||||
@ -7,8 +7,16 @@ from nanobot.agent.tools.web import WebSearchTool
|
||||
from nanobot.config.schema import WebSearchConfig
|
||||
|
||||
|
||||
def _tool(provider: str = "brave", api_key: str = "", base_url: str = "") -> WebSearchTool:
|
||||
return WebSearchTool(config=WebSearchConfig(provider=provider, api_key=api_key, base_url=base_url))
|
||||
def _tool(
|
||||
provider: str = "brave",
|
||||
api_key: str = "",
|
||||
base_url: str = "",
|
||||
user_agent: str | None = None,
|
||||
) -> WebSearchTool:
|
||||
return WebSearchTool(
|
||||
config=WebSearchConfig(provider=provider, api_key=api_key, base_url=base_url),
|
||||
user_agent=user_agent,
|
||||
)
|
||||
|
||||
|
||||
def _response(status: int = 200, json: dict | None = None) -> httpx.Response:
|
||||
@ -42,12 +50,13 @@ async def test_brave_search(monkeypatch):
|
||||
async def mock_get(self, url, **kw):
|
||||
assert "brave" in url
|
||||
assert kw["headers"]["X-Subscription-Token"] == "brave-key"
|
||||
assert kw["headers"]["User-Agent"] == "nanobot-search-test"
|
||||
return _response(json={
|
||||
"web": {"results": [{"title": "NanoBot", "url": "https://example.com", "description": "AI assistant"}]}
|
||||
})
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "get", mock_get)
|
||||
tool = _tool(provider="brave", api_key="brave-key")
|
||||
tool = _tool(provider="brave", api_key="brave-key", user_agent="nanobot-search-test")
|
||||
result = await tool.execute(query="nanobot", count=1)
|
||||
assert "NanoBot" in result
|
||||
assert "https://example.com" in result
|
||||
@ -58,12 +67,13 @@ async def test_tavily_search(monkeypatch):
|
||||
async def mock_post(self, url, **kw):
|
||||
assert "tavily" in url
|
||||
assert kw["headers"]["Authorization"] == "Bearer tavily-key"
|
||||
assert kw["headers"]["User-Agent"] == "nanobot-search-test"
|
||||
return _response(json={
|
||||
"results": [{"title": "OpenClaw", "url": "https://openclaw.io", "content": "Framework"}]
|
||||
})
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
tool = _tool(provider="tavily", api_key="tavily-key")
|
||||
tool = _tool(provider="tavily", api_key="tavily-key", user_agent="nanobot-search-test")
|
||||
result = await tool.execute(query="openclaw")
|
||||
assert "OpenClaw" in result
|
||||
assert "https://openclaw.io" in result
|
||||
@ -73,12 +83,13 @@ async def test_tavily_search(monkeypatch):
|
||||
async def test_searxng_search(monkeypatch):
|
||||
async def mock_get(self, url, **kw):
|
||||
assert "searx.example" in url
|
||||
assert kw["headers"]["User-Agent"] == "nanobot-search-test"
|
||||
return _response(json={
|
||||
"results": [{"title": "Result", "url": "https://example.com", "content": "SearXNG result"}]
|
||||
})
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "get", mock_get)
|
||||
tool = _tool(provider="searxng", base_url="https://searx.example")
|
||||
tool = _tool(provider="searxng", base_url="https://searx.example", user_agent="nanobot-search-test")
|
||||
result = await tool.execute(query="test")
|
||||
assert "Result" in result
|
||||
|
||||
@ -125,12 +136,13 @@ async def test_jina_search(monkeypatch):
|
||||
async def mock_get(self, url, **kw):
|
||||
assert "s.jina.ai" in str(url)
|
||||
assert kw["headers"]["Authorization"] == "Bearer jina-key"
|
||||
assert kw["headers"]["User-Agent"] == "nanobot-search-test"
|
||||
return _response(json={
|
||||
"data": [{"title": "Jina Result", "url": "https://jina.ai", "content": "AI search"}]
|
||||
})
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "get", mock_get)
|
||||
tool = _tool(provider="jina", api_key="jina-key")
|
||||
tool = _tool(provider="jina", api_key="jina-key", user_agent="nanobot-search-test")
|
||||
result = await tool.execute(query="test")
|
||||
assert "Jina Result" in result
|
||||
assert "https://jina.ai" in result
|
||||
@ -141,6 +153,7 @@ async def test_kagi_search(monkeypatch):
|
||||
async def mock_get(self, url, **kw):
|
||||
assert "kagi.com/api/v0/search" in url
|
||||
assert kw["headers"]["Authorization"] == "Bot kagi-key"
|
||||
assert kw["headers"]["User-Agent"] == "nanobot-search-test"
|
||||
assert kw["params"] == {"q": "test", "limit": 2}
|
||||
return _response(json={
|
||||
"data": [
|
||||
@ -150,7 +163,7 @@ async def test_kagi_search(monkeypatch):
|
||||
})
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "get", mock_get)
|
||||
tool = _tool(provider="kagi", api_key="kagi-key")
|
||||
tool = _tool(provider="kagi", api_key="kagi-key", user_agent="nanobot-search-test")
|
||||
result = await tool.execute(query="test", count=2)
|
||||
assert "Kagi Result" in result
|
||||
assert "https://kagi.com" in result
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user