fix shell guard url path detection

This commit is contained in:
Haisam Abbas 2026-05-20 17:16:53 +05:00 committed by Xubin Ren
parent 4f895e6307
commit e645fbcb34
2 changed files with 37 additions and 1 deletions

View File

@ -418,7 +418,7 @@ class ExecTool(Tool):
# Windows: match drive-root paths like `C:\` as well as `C:\path\to\file`, and UNC paths like `\\server\share` # Windows: match drive-root paths like `C:\` as well as `C:\path\to\file`, and UNC paths like `\\server\share`
# NOTE: `*` is required so `C:\` (nothing after the slash) is still extracted. # NOTE: `*` is required so `C:\` (nothing after the slash) is still extracted.
win_paths = re.findall( win_paths = re.findall(
r"(?:[A-Za-z]:[^\s\"'|><;]*|\\\\[^\s\"'|><;]+(?:\\[^\s\"'|><;]+)*)", r"(?<![A-Za-z])(?:[A-Za-z]:[^\s\"'|><;]*|\\\\[^\s\"'|><;]+(?:\\[^\s\"'|><;]+)*)",
command command
) )
posix_paths = re.findall(r"(?:^|[\s|>'\"])(/[^\s\"'>;|<]+)", command) # POSIX: /absolute only posix_paths = re.findall(r"(?:^|[\s|>'\"])(/[^\s\"'>;|<]+)", command) # POSIX: /absolute only

View File

@ -3,6 +3,8 @@ import subprocess
import sys import sys
from typing import Any from typing import Any
import pytest
from nanobot.agent.tools import ( from nanobot.agent.tools import (
ArraySchema, ArraySchema,
IntegerSchema, IntegerSchema,
@ -15,6 +17,7 @@ from nanobot.agent.tools import (
from nanobot.agent.tools.base import Tool from nanobot.agent.tools.base import Tool
from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.registry import ToolRegistry
from nanobot.agent.tools.shell import ExecTool from nanobot.agent.tools.shell import ExecTool
from nanobot.security.network import configure_ssrf_whitelist
class SampleTool(Tool): class SampleTool(Tool):
@ -218,6 +221,39 @@ def test_exec_extract_absolute_paths_ignores_relative_posix_segments() -> None:
assert "/bin/python" not in paths assert "/bin/python" not in paths
def test_exec_extract_absolute_paths_ignores_urls() -> None:
cmd = 'curl -s -o /dev/null -w "%{http_code}" https://www.google.com'
paths = ExecTool._extract_absolute_paths(cmd)
assert paths == ["/dev/null"]
@pytest.mark.parametrize(
"command",
[
'curl -s -o /dev/null -w "%{http_code}" https://www.google.com',
'wget -q -O - http://example.com 2>&1 | head -c 100',
'python3 -c "import urllib.request; print(urllib.request.urlopen(\'http://example.com\').read()[:100])"',
],
)
def test_exec_guard_allows_public_urls(tmp_path, command: str) -> None:
tool = ExecTool(restrict_to_workspace=True)
error = tool._guard_command(command, str(tmp_path))
assert error is None
def test_exec_guard_allows_whitelisted_internal_urls(tmp_path) -> None:
configure_ssrf_whitelist(["10.10.10.0/24"])
try:
tool = ExecTool(restrict_to_workspace=True)
error = tool._guard_command(
'curl -s -H "Authorization: Bearer ..." http://10.10.10.3:8123/api/',
str(tmp_path),
)
assert error is None
finally:
configure_ssrf_whitelist([])
def test_exec_extract_absolute_paths_captures_posix_absolute_paths() -> None: def test_exec_extract_absolute_paths_captures_posix_absolute_paths() -> None:
cmd = "cat /tmp/data.txt > /tmp/out.txt" cmd = "cat /tmp/data.txt > /tmp/out.txt"
paths = ExecTool._extract_absolute_paths(cmd) paths = ExecTool._extract_absolute_paths(cmd)