From e645fbcb34ee61884167c6b2b260545d6d3c8aba Mon Sep 17 00:00:00 2001 From: Haisam Abbas Date: Wed, 20 May 2026 17:16:53 +0500 Subject: [PATCH] fix shell guard url path detection --- nanobot/agent/tools/shell.py | 2 +- tests/tools/test_tool_validation.py | 36 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/nanobot/agent/tools/shell.py b/nanobot/agent/tools/shell.py index 0252b9746..7e0ef57a8 100644 --- a/nanobot/agent/tools/shell.py +++ b/nanobot/agent/tools/shell.py @@ -418,7 +418,7 @@ class ExecTool(Tool): # Windows: match drive-root paths like `C:\` as well as `C:\path\to\file`, and UNC paths like `\\server\share` # NOTE: `*` is required so `C:\` (nothing after the slash) is still extracted. win_paths = re.findall( - r"(?:[A-Za-z]:[^\s\"'|><;]*|\\\\[^\s\"'|><;]+(?:\\[^\s\"'|><;]+)*)", + r"(?<;]*|\\\\[^\s\"'|><;]+(?:\\[^\s\"'|><;]+)*)", command ) posix_paths = re.findall(r"(?:^|[\s|>'\"])(/[^\s\"'>;|<]+)", command) # POSIX: /absolute only diff --git a/tests/tools/test_tool_validation.py b/tests/tools/test_tool_validation.py index 42620dcc6..188a8952f 100644 --- a/tests/tools/test_tool_validation.py +++ b/tests/tools/test_tool_validation.py @@ -3,6 +3,8 @@ import subprocess import sys from typing import Any +import pytest + from nanobot.agent.tools import ( ArraySchema, IntegerSchema, @@ -15,6 +17,7 @@ from nanobot.agent.tools import ( from nanobot.agent.tools.base import Tool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.shell import ExecTool +from nanobot.security.network import configure_ssrf_whitelist class SampleTool(Tool): @@ -218,6 +221,39 @@ def test_exec_extract_absolute_paths_ignores_relative_posix_segments() -> None: assert "/bin/python" not in paths +def test_exec_extract_absolute_paths_ignores_urls() -> None: + cmd = 'curl -s -o /dev/null -w "%{http_code}" https://www.google.com' + paths = ExecTool._extract_absolute_paths(cmd) + assert paths == ["/dev/null"] + + +@pytest.mark.parametrize( + "command", + [ + 'curl -s -o /dev/null -w "%{http_code}" https://www.google.com', + 'wget -q -O - http://example.com 2>&1 | head -c 100', + 'python3 -c "import urllib.request; print(urllib.request.urlopen(\'http://example.com\').read()[:100])"', + ], +) +def test_exec_guard_allows_public_urls(tmp_path, command: str) -> None: + tool = ExecTool(restrict_to_workspace=True) + error = tool._guard_command(command, str(tmp_path)) + assert error is None + + +def test_exec_guard_allows_whitelisted_internal_urls(tmp_path) -> None: + configure_ssrf_whitelist(["10.10.10.0/24"]) + try: + tool = ExecTool(restrict_to_workspace=True) + error = tool._guard_command( + 'curl -s -H "Authorization: Bearer ..." http://10.10.10.3:8123/api/', + str(tmp_path), + ) + assert error is None + finally: + configure_ssrf_whitelist([]) + + def test_exec_extract_absolute_paths_captures_posix_absolute_paths() -> None: cmd = "cat /tmp/data.txt > /tmp/out.txt" paths = ExecTool._extract_absolute_paths(cmd)