mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-07 02:05:51 +00:00
PR #3493 promoted every shell `_guard_command` rejection to a turn-fatal RuntimeError. The two heuristic outputs in that list -- `path outside working dir` and `path traversal detected` -- routinely false-positive on benign constructs (e.g. `2>/dev/null`, quoted `..` arguments to sed/find, absolute paths inside inline scripts), so legitimate workspace commands silently kill the user's turn (#3599) and the agent never gets a chance to retry with a different approach (#3605). Two changes, both narrowly scoped: - `ExecTool._guard_command` now skips a small allow-list of kernel device files (`/dev/null`, the standard streams, `/dev/random`, `/dev/fd/N`, ...) before the workspace path check, matched against the pre-resolve string so symlinks like `/dev/stderr -> /proc/self/fd/2` still hit the allow-list. Real outside writes such as `> /etc/issue` remain blocked. - `AgentRunner._WORKSPACE_BLOCK_MARKERS` keeps only the four hard path-resolution errors from filesystem.py / shell.py and the SSRF marker. The two heuristic substrings move out of the fatal list, so the LLM sees them as ordinary tool errors and can self-correct in the next iteration. SSRF stays fatal because retrying an internal URL with a different phrasing would defeat the safety boundary. Tests: - `tests/tools/test_exec_security.py`: parametrized regression for the exact #3599 command sample plus other stdio redirects and device reads; explicit negative case asserts `> /etc/issue` is still blocked. - `tests/agent/test_runner.py`: `_is_workspace_violation` no longer fatals on the two heuristic markers, plus an end-to-end case proving the runner hands the guard error back to the LLM and finalizes the next turn cleanly.
This commit is contained in:
parent
9a9e446f3f
commit
7742f8fbdc
@ -831,14 +831,30 @@ class AgentRunner:
|
|||||||
detail = detail[:120] + "..."
|
detail = detail[:120] + "..."
|
||||||
return result, {"name": tool_call.name, "status": "ok", "detail": detail}, None
|
return result, {"name": tool_call.name, "status": "ok", "detail": detail}, None
|
||||||
|
|
||||||
# Markers identifying tool results that represent a workspace / safety boundary rejection.
|
# Markers identifying tool results that represent a *hard* workspace /
|
||||||
|
# safety boundary rejection -- only these abort the agent loop.
|
||||||
|
#
|
||||||
|
# We deliberately keep this list narrow (#3599 / #3605):
|
||||||
|
# - The first four come from explicit path-resolution checks in
|
||||||
|
# ``filesystem.py`` and ``shell.py`` that cannot false-positive on user
|
||||||
|
# payloads -- if you see them, the LLM truly tried to escape the
|
||||||
|
# workspace.
|
||||||
|
# - "internal/private url detected" stays fatal because SSRF is a real
|
||||||
|
# security boundary; allowing the LLM to "retry" would just let it
|
||||||
|
# poke internal infra with a different URL phrasing.
|
||||||
|
# - "path traversal detected" and "path outside working dir" are
|
||||||
|
# intentionally *not* listed: both come from the heuristic
|
||||||
|
# ``_guard_command`` checks in ``shell.py`` which scan the raw command
|
||||||
|
# string and routinely false-positive on legitimate constructs (e.g.
|
||||||
|
# ``2>/dev/null`` redirects, quoted ``..`` arguments to ``sed`` /
|
||||||
|
# ``find``, paths inside inline scripts). Treating them as fatal
|
||||||
|
# silently kills user turns (#3599) and prevents the agent from
|
||||||
|
# self-correcting by trying a different approach (#3605).
|
||||||
_WORKSPACE_BLOCK_MARKERS: tuple[str, ...] = (
|
_WORKSPACE_BLOCK_MARKERS: tuple[str, ...] = (
|
||||||
"outside the configured workspace",
|
"outside the configured workspace",
|
||||||
"outside allowed directory",
|
"outside allowed directory",
|
||||||
"working_dir is outside",
|
"working_dir is outside",
|
||||||
"working_dir could not be resolved",
|
"working_dir could not be resolved",
|
||||||
"path traversal detected",
|
|
||||||
"path outside working dir",
|
|
||||||
"internal/private url detected",
|
"internal/private url detected",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -83,6 +83,22 @@ class ExecTool(Tool):
|
|||||||
_MAX_TIMEOUT = 600
|
_MAX_TIMEOUT = 600
|
||||||
_MAX_OUTPUT = 10_000
|
_MAX_OUTPUT = 10_000
|
||||||
|
|
||||||
|
# Kernel device files that are universally safe as stdio redirect targets
|
||||||
|
# (e.g. ``cmd 2>/dev/null``). Without this allow-list the workspace guard
|
||||||
|
# treats them as ``path outside working dir`` and the LLM ends up unable
|
||||||
|
# to silence stderr inside the workspace (#3599).
|
||||||
|
_BENIGN_DEVICE_PATHS: frozenset[str] = frozenset({
|
||||||
|
"/dev/null",
|
||||||
|
"/dev/zero",
|
||||||
|
"/dev/full",
|
||||||
|
"/dev/random",
|
||||||
|
"/dev/urandom",
|
||||||
|
"/dev/stdin",
|
||||||
|
"/dev/stdout",
|
||||||
|
"/dev/stderr",
|
||||||
|
"/dev/tty",
|
||||||
|
})
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def description(self) -> str:
|
def description(self) -> str:
|
||||||
return (
|
return (
|
||||||
@ -300,10 +316,18 @@ class ExecTool(Tool):
|
|||||||
for raw in self._extract_absolute_paths(cmd):
|
for raw in self._extract_absolute_paths(cmd):
|
||||||
try:
|
try:
|
||||||
expanded = os.path.expandvars(raw.strip())
|
expanded = os.path.expandvars(raw.strip())
|
||||||
|
# Match against the un-resolved path first. On Linux,
|
||||||
|
# /dev/stderr is a symlink to /proc/self/fd/2 and
|
||||||
|
# ``Path.resolve()`` would mask the device-file intent.
|
||||||
|
if self._is_benign_device_path(expanded):
|
||||||
|
continue
|
||||||
p = Path(expanded).expanduser().resolve()
|
p = Path(expanded).expanduser().resolve()
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if self._is_benign_device_path(str(p)):
|
||||||
|
continue
|
||||||
|
|
||||||
media_path = get_media_dir().resolve()
|
media_path = get_media_dir().resolve()
|
||||||
if (p.is_absolute()
|
if (p.is_absolute()
|
||||||
and cwd_path not in p.parents
|
and cwd_path not in p.parents
|
||||||
@ -315,6 +339,21 @@ class ExecTool(Tool):
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _is_benign_device_path(cls, path: str) -> bool:
|
||||||
|
"""Return True when *path* is a kernel device file we should never block.
|
||||||
|
|
||||||
|
Treats ``/dev/null``, the standard streams, ``/dev/random``, etc. as
|
||||||
|
always-safe targets so that idiomatic stdio plumbing such as
|
||||||
|
``cmd 2>/dev/null`` or ``echo done >/dev/stderr`` is not flagged as a
|
||||||
|
workspace violation regardless of the configured working directory.
|
||||||
|
Also accepts ``/dev/fd/N`` because those are per-process aliases for
|
||||||
|
already-open file descriptors and never escape the workspace.
|
||||||
|
"""
|
||||||
|
if path in cls._BENIGN_DEVICE_PATHS:
|
||||||
|
return True
|
||||||
|
return path.startswith("/dev/fd/")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_absolute_paths(command: str) -> list[str]:
|
def _extract_absolute_paths(command: str) -> list[str]:
|
||||||
# Windows: match drive-root paths like `C:\` as well as `C:\path\to\file`
|
# Windows: match drive-root paths like `C:\` as well as `C:\path\to\file`
|
||||||
|
|||||||
@ -373,6 +373,79 @@ def test_is_workspace_violation_recognizes_ssrf_block():
|
|||||||
) is False
|
) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_workspace_violation_does_not_fatal_on_shell_guard_heuristics():
|
||||||
|
"""#3599 / #3605 regression: shell guard heuristics must NOT be fatal.
|
||||||
|
|
||||||
|
``path outside working dir`` and ``path traversal detected`` are produced
|
||||||
|
by best-effort string scans inside ``ExecTool._guard_command`` -- they
|
||||||
|
routinely false-positive on idiomatic constructs (``2>/dev/null``,
|
||||||
|
``sed 's|x|../y|g'``) and should be surfaced to the LLM as recoverable
|
||||||
|
tool errors so it can switch tactics, not abort the whole turn.
|
||||||
|
"""
|
||||||
|
from nanobot.agent.runner import AgentRunner
|
||||||
|
|
||||||
|
assert AgentRunner._is_workspace_violation(
|
||||||
|
"Error: Command blocked by safety guard (path outside working dir)"
|
||||||
|
) is False
|
||||||
|
assert AgentRunner._is_workspace_violation(
|
||||||
|
"Error: Command blocked by safety guard (path traversal detected)"
|
||||||
|
) is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_lets_llm_recover_from_shell_guard_path_outside():
|
||||||
|
"""End-to-end: a guard-blocked exec is a soft tool error, not a turn-fatal.
|
||||||
|
|
||||||
|
Reporter scenario: a previous PR turned ``path outside working dir`` into
|
||||||
|
a turn-fatal RuntimeError, so when the false-positive guard fired the
|
||||||
|
user got no further iterations and (depending on channel) a silent hang.
|
||||||
|
After narrowing the marker list, the runner must hand the error back to
|
||||||
|
the LLM and let the next iteration succeed normally.
|
||||||
|
"""
|
||||||
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||||
|
|
||||||
|
provider = MagicMock()
|
||||||
|
captured_second_call: list[dict] = []
|
||||||
|
|
||||||
|
async def chat_with_retry(*, messages, **kwargs):
|
||||||
|
if provider.chat_with_retry.await_count == 1:
|
||||||
|
return LLMResponse(
|
||||||
|
content="trying noisy cleanup",
|
||||||
|
tool_calls=[ToolCallRequest(
|
||||||
|
id="call_blocked",
|
||||||
|
name="exec",
|
||||||
|
arguments={"command": "rm scratch.txt 2>/dev/null"},
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
captured_second_call[:] = list(messages)
|
||||||
|
return LLMResponse(content="recovered final answer", tool_calls=[])
|
||||||
|
|
||||||
|
provider.chat_with_retry = AsyncMock(side_effect=chat_with_retry)
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
tools.execute = AsyncMock(
|
||||||
|
return_value="Error: Command blocked by safety guard (path outside working dir)"
|
||||||
|
)
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=3,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
))
|
||||||
|
|
||||||
|
assert provider.chat_with_retry.await_count == 2, (
|
||||||
|
"guard hit must NOT short-circuit the loop -- LLM should get a second turn"
|
||||||
|
)
|
||||||
|
assert result.stop_reason != "tool_error"
|
||||||
|
assert result.error is None
|
||||||
|
assert result.final_content == "recovered final answer"
|
||||||
|
assert result.tool_events and result.tool_events[0]["status"] == "error"
|
||||||
|
assert "workspace_violation" not in result.tool_events[0]["detail"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path):
|
async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path):
|
||||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||||
|
|||||||
@ -182,3 +182,62 @@ async def test_exec_ignores_workspace_check_when_not_restricted(tmp_path):
|
|||||||
result = await tool.execute(command="echo ok", working_dir=str(other))
|
result = await tool.execute(command="echo ok", working_dir=str(other))
|
||||||
assert "ok" in result
|
assert "ok" in result
|
||||||
assert "outside the configured workspace" not in result
|
assert "outside the configured workspace" not in result
|
||||||
|
|
||||||
|
|
||||||
|
# --- #3599: stdio redirects to /dev/null must not trip the workspace guard ----
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"command",
|
||||||
|
[
|
||||||
|
# The exact command from the #3599 reporter.
|
||||||
|
'rm test_print.txt 2>/dev/null; echo "done"',
|
||||||
|
# Plain redirect of stdout / stderr.
|
||||||
|
"find . -type f >/dev/null",
|
||||||
|
"noisy_cmd 2>/dev/null",
|
||||||
|
"noisy_cmd >/dev/null 2>&1",
|
||||||
|
# Read from /dev/urandom is also a benign device read.
|
||||||
|
"head -c 16 /dev/urandom | xxd",
|
||||||
|
"echo done >/dev/stderr",
|
||||||
|
"echo line </dev/stdin",
|
||||||
|
# Per-process FD aliases never escape the workspace.
|
||||||
|
"cat /dev/fd/3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_exec_allows_benign_device_targets_inside_workspace(tmp_path, command):
|
||||||
|
workspace = tmp_path / "workspace"
|
||||||
|
workspace.mkdir()
|
||||||
|
tool = ExecTool(working_dir=str(workspace), restrict_to_workspace=True)
|
||||||
|
assert tool._guard_command(command, str(workspace)) is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_exec_3599_regression_rm_with_dev_null_redirect(tmp_path):
|
||||||
|
"""#3599: ``rm <ws-path> 2>/dev/null`` must succeed against the workspace guard."""
|
||||||
|
workspace = tmp_path / "workspace"
|
||||||
|
workspace.mkdir()
|
||||||
|
target = workspace / "test_print.txt"
|
||||||
|
target.write_text("scratch")
|
||||||
|
tool = ExecTool(working_dir=str(workspace), restrict_to_workspace=True, timeout=5)
|
||||||
|
result = await tool.execute(
|
||||||
|
command=f'rm {target} 2>/dev/null; echo "done"',
|
||||||
|
working_dir=str(workspace),
|
||||||
|
)
|
||||||
|
assert "done" in result
|
||||||
|
assert "path outside working dir" not in result
|
||||||
|
assert not target.exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_still_blocks_real_outside_path_via_redirect(tmp_path):
|
||||||
|
"""Redirect *targets* outside the workspace (not /dev/...) must still be blocked.
|
||||||
|
|
||||||
|
We only whitelist kernel device files; arbitrary outside redirects such as
|
||||||
|
``> /etc/issue`` should remain caught by the workspace guard so a buggy
|
||||||
|
LLM cannot exfiltrate data outside the workspace via stderr redirection.
|
||||||
|
"""
|
||||||
|
workspace = tmp_path / "workspace"
|
||||||
|
workspace.mkdir()
|
||||||
|
tool = ExecTool(working_dir=str(workspace), restrict_to_workspace=True)
|
||||||
|
blocked = tool._guard_command("echo pwn > /etc/issue", str(workspace))
|
||||||
|
assert blocked is not None
|
||||||
|
assert "path outside working dir" in blocked
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user