diff --git a/nanobot/agent/tools/web.py b/nanobot/agent/tools/web.py index e0372bead..e25e196c7 100644 --- a/nanobot/agent/tools/web.py +++ b/nanobot/agent/tools/web.py @@ -388,6 +388,9 @@ class WebFetchTool(Tool): max_chars: int | None = None, **kwargs: Any, ) -> Any: + url = url.strip().strip("`").strip('"').strip("'") + if not (url.startswith("http://") or url.startswith("https://")): + return json.dumps({"error": "Invalid URL after cleaning", "url": url}, ensure_ascii=False) extract_mode = kwargs.pop("extractMode", extract_mode) max_chars = kwargs.pop("maxChars", max_chars) or self.max_chars is_valid, error_msg = _validate_url_safe(url) diff --git a/tests/tools/test_web_fetch_url_sanitization.py b/tests/tools/test_web_fetch_url_sanitization.py new file mode 100644 index 000000000..e44b206a3 --- /dev/null +++ b/tests/tools/test_web_fetch_url_sanitization.py @@ -0,0 +1,139 @@ +"""Tests for web_fetch URL sanitization (backtick/quote stripping).""" + +from __future__ import annotations + +import json +from unittest.mock import patch + +import pytest + +from nanobot.agent.tools.web import WebFetchTool, _validate_url + + +def _fake_resolve_public(hostname, port, family=0, type_=0): + import socket + return [(socket.AF_INET, socket.SOCK_STREAM, 0, "", ("93.184.216.34", 0))] + + +class FakeResponse: + status_code = 200 + url = "https://example.com/page" + text = "T

ok

" + headers = {"content-type": "text/html"} + def raise_for_status(self): pass + def json(self): return {} + + +class FakeStreamResponse: + headers = {"content-type": "text/html"} + url = "https://example.com/page" + async def __aenter__(self): return self + async def __aexit__(self, *a): return False + + +class FakeClient: + def __init__(self, *a, **kw): pass + async def __aenter__(self): return self + async def __aexit__(self, *a): return False + def stream(self, method, url, **kw): + return FakeStreamResponse() + async def get(self, url, **kw): + return FakeResponse() + + +def _patch_env(): + return patch("nanobot.security.network.socket.getaddrinfo", _fake_resolve_public), \ + patch("nanobot.agent.tools.web.httpx.AsyncClient", FakeClient) + + +# --- urlparse / _validate_url level tests --- + +@pytest.mark.parametrize("dirty_url", [ + "`https://example.com/page`", + " `https://example.com/page` ", + '"https://example.com/page"', + "'https://example.com/page'", + ' "https://example.com/page" ', +]) +def test_dirty_urls_fail_validation(dirty_url): + is_valid, msg = _validate_url(dirty_url) + assert not is_valid + + +def test_clean_url_passes_validation(): + is_valid, msg = _validate_url("https://example.com/page") + assert is_valid + + +def test_backtick_url_produces_empty_scheme_in_urlparse(): + from urllib.parse import urlparse + p = urlparse("`https://example.com/page`") + assert p.scheme == "" + assert p.netloc == "" + + +# --- WebFetchTool.execute integration tests --- + +@pytest.mark.asyncio +async def test_execute_strips_backticks_and_succeeds(): + tool = WebFetchTool() + with _patch_env()[0], _patch_env()[1]: + result = await tool.execute(url="`https://example.com/page`") + data = json.loads(result) + assert "error" not in data, f"unexpected error: {data}" + + +@pytest.mark.asyncio +async def test_execute_strips_double_quotes_and_succeeds(): + tool = WebFetchTool() + with _patch_env()[0], _patch_env()[1]: + result = await tool.execute(url='"https://example.com/page"') + data = json.loads(result) + assert "error" not in data, f"unexpected error: {data}" + + +@pytest.mark.asyncio +async def test_execute_strips_single_quotes_and_succeeds(): + tool = WebFetchTool() + with _patch_env()[0], _patch_env()[1]: + result = await tool.execute(url="'https://example.com/page'") + data = json.loads(result) + assert "error" not in data, f"unexpected error: {data}" + + +@pytest.mark.asyncio +async def test_execute_strips_space_and_backticks(): + tool = WebFetchTool() + with _patch_env()[0], _patch_env()[1]: + result = await tool.execute(url=" `https://example.com/page` ") + data = json.loads(result) + assert "error" not in data, f"unexpected error: {data}" + + +# --- startswith guard tests --- + +@pytest.mark.asyncio +async def test_execute_rejects_non_http_url_after_cleaning(): + tool = WebFetchTool() + result = await tool.execute(url="ftp://example.com/file") + data = json.loads(result) + assert "error" in data + assert "Invalid URL" in data["error"] + + +@pytest.mark.asyncio +async def test_execute_rejects_garbage_after_cleaning(): + tool = WebFetchTool() + result = await tool.execute(url="`not a url at all`") + data = json.loads(result) + assert "error" in data + assert "Invalid URL" in data["error"] + + +@pytest.mark.asyncio +async def test_execute_rejects_bare_domain_after_cleaning(): + tool = WebFetchTool() + result = await tool.execute(url="`example.com/page`") + data = json.loads(result) + assert "error" in data + assert "Invalid URL" in data["error"]