mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-07 02:05:51 +00:00
PR #3493 promoted every shell `_guard_command` rejection to a turn-fatal RuntimeError. The two heuristic outputs in that list -- `path outside working dir` and `path traversal detected` -- routinely false-positive on benign constructs (e.g. `2>/dev/null`, quoted `..` arguments to sed/find, absolute paths inside inline scripts), so legitimate workspace commands silently kill the user's turn (#3599) and the agent never gets a chance to retry with a different approach (#3605). Two changes, both narrowly scoped: - `ExecTool._guard_command` now skips a small allow-list of kernel device files (`/dev/null`, the standard streams, `/dev/random`, `/dev/fd/N`, ...) before the workspace path check, matched against the pre-resolve string so symlinks like `/dev/stderr -> /proc/self/fd/2` still hit the allow-list. Real outside writes such as `> /etc/issue` remain blocked. - `AgentRunner._WORKSPACE_BLOCK_MARKERS` keeps only the four hard path-resolution errors from filesystem.py / shell.py and the SSRF marker. The two heuristic substrings move out of the fatal list, so the LLM sees them as ordinary tool errors and can self-correct in the next iteration. SSRF stays fatal because retrying an internal URL with a different phrasing would defeat the safety boundary. Tests: - `tests/tools/test_exec_security.py`: parametrized regression for the exact #3599 command sample plus other stdio redirects and device reads; explicit negative case asserts `> /etc/issue` is still blocked. - `tests/agent/test_runner.py`: `_is_workspace_violation` no longer fatals on the two heuristic markers, plus an end-to-end case proving the runner hands the guard error back to the LLM and finalizes the next turn cleanly.
This commit is contained in:
parent
9a9e446f3f
commit
7742f8fbdc
@ -831,14 +831,30 @@ class AgentRunner:
|
||||
detail = detail[:120] + "..."
|
||||
return result, {"name": tool_call.name, "status": "ok", "detail": detail}, None
|
||||
|
||||
# Markers identifying tool results that represent a workspace / safety boundary rejection.
|
||||
# Markers identifying tool results that represent a *hard* workspace /
|
||||
# safety boundary rejection -- only these abort the agent loop.
|
||||
#
|
||||
# We deliberately keep this list narrow (#3599 / #3605):
|
||||
# - The first four come from explicit path-resolution checks in
|
||||
# ``filesystem.py`` and ``shell.py`` that cannot false-positive on user
|
||||
# payloads -- if you see them, the LLM truly tried to escape the
|
||||
# workspace.
|
||||
# - "internal/private url detected" stays fatal because SSRF is a real
|
||||
# security boundary; allowing the LLM to "retry" would just let it
|
||||
# poke internal infra with a different URL phrasing.
|
||||
# - "path traversal detected" and "path outside working dir" are
|
||||
# intentionally *not* listed: both come from the heuristic
|
||||
# ``_guard_command`` checks in ``shell.py`` which scan the raw command
|
||||
# string and routinely false-positive on legitimate constructs (e.g.
|
||||
# ``2>/dev/null`` redirects, quoted ``..`` arguments to ``sed`` /
|
||||
# ``find``, paths inside inline scripts). Treating them as fatal
|
||||
# silently kills user turns (#3599) and prevents the agent from
|
||||
# self-correcting by trying a different approach (#3605).
|
||||
_WORKSPACE_BLOCK_MARKERS: tuple[str, ...] = (
|
||||
"outside the configured workspace",
|
||||
"outside allowed directory",
|
||||
"working_dir is outside",
|
||||
"working_dir could not be resolved",
|
||||
"path traversal detected",
|
||||
"path outside working dir",
|
||||
"internal/private url detected",
|
||||
)
|
||||
|
||||
|
||||
@ -83,6 +83,22 @@ class ExecTool(Tool):
|
||||
_MAX_TIMEOUT = 600
|
||||
_MAX_OUTPUT = 10_000
|
||||
|
||||
# Kernel device files that are universally safe as stdio redirect targets
|
||||
# (e.g. ``cmd 2>/dev/null``). Without this allow-list the workspace guard
|
||||
# treats them as ``path outside working dir`` and the LLM ends up unable
|
||||
# to silence stderr inside the workspace (#3599).
|
||||
_BENIGN_DEVICE_PATHS: frozenset[str] = frozenset({
|
||||
"/dev/null",
|
||||
"/dev/zero",
|
||||
"/dev/full",
|
||||
"/dev/random",
|
||||
"/dev/urandom",
|
||||
"/dev/stdin",
|
||||
"/dev/stdout",
|
||||
"/dev/stderr",
|
||||
"/dev/tty",
|
||||
})
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
@ -300,10 +316,18 @@ class ExecTool(Tool):
|
||||
for raw in self._extract_absolute_paths(cmd):
|
||||
try:
|
||||
expanded = os.path.expandvars(raw.strip())
|
||||
# Match against the un-resolved path first. On Linux,
|
||||
# /dev/stderr is a symlink to /proc/self/fd/2 and
|
||||
# ``Path.resolve()`` would mask the device-file intent.
|
||||
if self._is_benign_device_path(expanded):
|
||||
continue
|
||||
p = Path(expanded).expanduser().resolve()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if self._is_benign_device_path(str(p)):
|
||||
continue
|
||||
|
||||
media_path = get_media_dir().resolve()
|
||||
if (p.is_absolute()
|
||||
and cwd_path not in p.parents
|
||||
@ -315,6 +339,21 @@ class ExecTool(Tool):
|
||||
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _is_benign_device_path(cls, path: str) -> bool:
|
||||
"""Return True when *path* is a kernel device file we should never block.
|
||||
|
||||
Treats ``/dev/null``, the standard streams, ``/dev/random``, etc. as
|
||||
always-safe targets so that idiomatic stdio plumbing such as
|
||||
``cmd 2>/dev/null`` or ``echo done >/dev/stderr`` is not flagged as a
|
||||
workspace violation regardless of the configured working directory.
|
||||
Also accepts ``/dev/fd/N`` because those are per-process aliases for
|
||||
already-open file descriptors and never escape the workspace.
|
||||
"""
|
||||
if path in cls._BENIGN_DEVICE_PATHS:
|
||||
return True
|
||||
return path.startswith("/dev/fd/")
|
||||
|
||||
@staticmethod
|
||||
def _extract_absolute_paths(command: str) -> list[str]:
|
||||
# Windows: match drive-root paths like `C:\` as well as `C:\path\to\file`
|
||||
|
||||
@ -373,6 +373,79 @@ def test_is_workspace_violation_recognizes_ssrf_block():
|
||||
) is False
|
||||
|
||||
|
||||
def test_is_workspace_violation_does_not_fatal_on_shell_guard_heuristics():
|
||||
"""#3599 / #3605 regression: shell guard heuristics must NOT be fatal.
|
||||
|
||||
``path outside working dir`` and ``path traversal detected`` are produced
|
||||
by best-effort string scans inside ``ExecTool._guard_command`` -- they
|
||||
routinely false-positive on idiomatic constructs (``2>/dev/null``,
|
||||
``sed 's|x|../y|g'``) and should be surfaced to the LLM as recoverable
|
||||
tool errors so it can switch tactics, not abort the whole turn.
|
||||
"""
|
||||
from nanobot.agent.runner import AgentRunner
|
||||
|
||||
assert AgentRunner._is_workspace_violation(
|
||||
"Error: Command blocked by safety guard (path outside working dir)"
|
||||
) is False
|
||||
assert AgentRunner._is_workspace_violation(
|
||||
"Error: Command blocked by safety guard (path traversal detected)"
|
||||
) is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_lets_llm_recover_from_shell_guard_path_outside():
|
||||
"""End-to-end: a guard-blocked exec is a soft tool error, not a turn-fatal.
|
||||
|
||||
Reporter scenario: a previous PR turned ``path outside working dir`` into
|
||||
a turn-fatal RuntimeError, so when the false-positive guard fired the
|
||||
user got no further iterations and (depending on channel) a silent hang.
|
||||
After narrowing the marker list, the runner must hand the error back to
|
||||
the LLM and let the next iteration succeed normally.
|
||||
"""
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
provider = MagicMock()
|
||||
captured_second_call: list[dict] = []
|
||||
|
||||
async def chat_with_retry(*, messages, **kwargs):
|
||||
if provider.chat_with_retry.await_count == 1:
|
||||
return LLMResponse(
|
||||
content="trying noisy cleanup",
|
||||
tool_calls=[ToolCallRequest(
|
||||
id="call_blocked",
|
||||
name="exec",
|
||||
arguments={"command": "rm scratch.txt 2>/dev/null"},
|
||||
)],
|
||||
)
|
||||
captured_second_call[:] = list(messages)
|
||||
return LLMResponse(content="recovered final answer", tool_calls=[])
|
||||
|
||||
provider.chat_with_retry = AsyncMock(side_effect=chat_with_retry)
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
tools.execute = AsyncMock(
|
||||
return_value="Error: Command blocked by safety guard (path outside working dir)"
|
||||
)
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=3,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
))
|
||||
|
||||
assert provider.chat_with_retry.await_count == 2, (
|
||||
"guard hit must NOT short-circuit the loop -- LLM should get a second turn"
|
||||
)
|
||||
assert result.stop_reason != "tool_error"
|
||||
assert result.error is None
|
||||
assert result.final_content == "recovered final answer"
|
||||
assert result.tool_events and result.tool_events[0]["status"] == "error"
|
||||
assert "workspace_violation" not in result.tool_events[0]["detail"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path):
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
@ -182,3 +182,62 @@ async def test_exec_ignores_workspace_check_when_not_restricted(tmp_path):
|
||||
result = await tool.execute(command="echo ok", working_dir=str(other))
|
||||
assert "ok" in result
|
||||
assert "outside the configured workspace" not in result
|
||||
|
||||
|
||||
# --- #3599: stdio redirects to /dev/null must not trip the workspace guard ----
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
# The exact command from the #3599 reporter.
|
||||
'rm test_print.txt 2>/dev/null; echo "done"',
|
||||
# Plain redirect of stdout / stderr.
|
||||
"find . -type f >/dev/null",
|
||||
"noisy_cmd 2>/dev/null",
|
||||
"noisy_cmd >/dev/null 2>&1",
|
||||
# Read from /dev/urandom is also a benign device read.
|
||||
"head -c 16 /dev/urandom | xxd",
|
||||
"echo done >/dev/stderr",
|
||||
"echo line </dev/stdin",
|
||||
# Per-process FD aliases never escape the workspace.
|
||||
"cat /dev/fd/3",
|
||||
],
|
||||
)
|
||||
def test_exec_allows_benign_device_targets_inside_workspace(tmp_path, command):
|
||||
workspace = tmp_path / "workspace"
|
||||
workspace.mkdir()
|
||||
tool = ExecTool(working_dir=str(workspace), restrict_to_workspace=True)
|
||||
assert tool._guard_command(command, str(workspace)) is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_exec_3599_regression_rm_with_dev_null_redirect(tmp_path):
|
||||
"""#3599: ``rm <ws-path> 2>/dev/null`` must succeed against the workspace guard."""
|
||||
workspace = tmp_path / "workspace"
|
||||
workspace.mkdir()
|
||||
target = workspace / "test_print.txt"
|
||||
target.write_text("scratch")
|
||||
tool = ExecTool(working_dir=str(workspace), restrict_to_workspace=True, timeout=5)
|
||||
result = await tool.execute(
|
||||
command=f'rm {target} 2>/dev/null; echo "done"',
|
||||
working_dir=str(workspace),
|
||||
)
|
||||
assert "done" in result
|
||||
assert "path outside working dir" not in result
|
||||
assert not target.exists()
|
||||
|
||||
|
||||
def test_exec_still_blocks_real_outside_path_via_redirect(tmp_path):
|
||||
"""Redirect *targets* outside the workspace (not /dev/...) must still be blocked.
|
||||
|
||||
We only whitelist kernel device files; arbitrary outside redirects such as
|
||||
``> /etc/issue`` should remain caught by the workspace guard so a buggy
|
||||
LLM cannot exfiltrate data outside the workspace via stderr redirection.
|
||||
"""
|
||||
workspace = tmp_path / "workspace"
|
||||
workspace.mkdir()
|
||||
tool = ExecTool(working_dir=str(workspace), restrict_to_workspace=True)
|
||||
blocked = tool._guard_command("echo pwn > /etc/issue", str(workspace))
|
||||
assert blocked is not None
|
||||
assert "path outside working dir" in blocked
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user