diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index 3d941f382..50b86de43 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -831,14 +831,30 @@ class AgentRunner: detail = detail[:120] + "..." return result, {"name": tool_call.name, "status": "ok", "detail": detail}, None - # Markers identifying tool results that represent a workspace / safety boundary rejection. + # Markers identifying tool results that represent a *hard* workspace / + # safety boundary rejection -- only these abort the agent loop. + # + # We deliberately keep this list narrow (#3599 / #3605): + # - The first four come from explicit path-resolution checks in + # ``filesystem.py`` and ``shell.py`` that cannot false-positive on user + # payloads -- if you see them, the LLM truly tried to escape the + # workspace. + # - "internal/private url detected" stays fatal because SSRF is a real + # security boundary; allowing the LLM to "retry" would just let it + # poke internal infra with a different URL phrasing. + # - "path traversal detected" and "path outside working dir" are + # intentionally *not* listed: both come from the heuristic + # ``_guard_command`` checks in ``shell.py`` which scan the raw command + # string and routinely false-positive on legitimate constructs (e.g. + # ``2>/dev/null`` redirects, quoted ``..`` arguments to ``sed`` / + # ``find``, paths inside inline scripts). Treating them as fatal + # silently kills user turns (#3599) and prevents the agent from + # self-correcting by trying a different approach (#3605). _WORKSPACE_BLOCK_MARKERS: tuple[str, ...] = ( "outside the configured workspace", "outside allowed directory", "working_dir is outside", "working_dir could not be resolved", - "path traversal detected", - "path outside working dir", "internal/private url detected", ) diff --git a/nanobot/agent/tools/shell.py b/nanobot/agent/tools/shell.py index b7f841a5c..177176d25 100644 --- a/nanobot/agent/tools/shell.py +++ b/nanobot/agent/tools/shell.py @@ -83,6 +83,22 @@ class ExecTool(Tool): _MAX_TIMEOUT = 600 _MAX_OUTPUT = 10_000 + # Kernel device files that are universally safe as stdio redirect targets + # (e.g. ``cmd 2>/dev/null``). Without this allow-list the workspace guard + # treats them as ``path outside working dir`` and the LLM ends up unable + # to silence stderr inside the workspace (#3599). + _BENIGN_DEVICE_PATHS: frozenset[str] = frozenset({ + "/dev/null", + "/dev/zero", + "/dev/full", + "/dev/random", + "/dev/urandom", + "/dev/stdin", + "/dev/stdout", + "/dev/stderr", + "/dev/tty", + }) + @property def description(self) -> str: return ( @@ -300,10 +316,18 @@ class ExecTool(Tool): for raw in self._extract_absolute_paths(cmd): try: expanded = os.path.expandvars(raw.strip()) + # Match against the un-resolved path first. On Linux, + # /dev/stderr is a symlink to /proc/self/fd/2 and + # ``Path.resolve()`` would mask the device-file intent. + if self._is_benign_device_path(expanded): + continue p = Path(expanded).expanduser().resolve() except Exception: continue + if self._is_benign_device_path(str(p)): + continue + media_path = get_media_dir().resolve() if (p.is_absolute() and cwd_path not in p.parents @@ -315,6 +339,21 @@ class ExecTool(Tool): return None + @classmethod + def _is_benign_device_path(cls, path: str) -> bool: + """Return True when *path* is a kernel device file we should never block. + + Treats ``/dev/null``, the standard streams, ``/dev/random``, etc. as + always-safe targets so that idiomatic stdio plumbing such as + ``cmd 2>/dev/null`` or ``echo done >/dev/stderr`` is not flagged as a + workspace violation regardless of the configured working directory. + Also accepts ``/dev/fd/N`` because those are per-process aliases for + already-open file descriptors and never escape the workspace. + """ + if path in cls._BENIGN_DEVICE_PATHS: + return True + return path.startswith("/dev/fd/") + @staticmethod def _extract_absolute_paths(command: str) -> list[str]: # Windows: match drive-root paths like `C:\` as well as `C:\path\to\file` diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py index aa558b4ff..86bb8f1bf 100644 --- a/tests/agent/test_runner.py +++ b/tests/agent/test_runner.py @@ -373,6 +373,79 @@ def test_is_workspace_violation_recognizes_ssrf_block(): ) is False +def test_is_workspace_violation_does_not_fatal_on_shell_guard_heuristics(): + """#3599 / #3605 regression: shell guard heuristics must NOT be fatal. + + ``path outside working dir`` and ``path traversal detected`` are produced + by best-effort string scans inside ``ExecTool._guard_command`` -- they + routinely false-positive on idiomatic constructs (``2>/dev/null``, + ``sed 's|x|../y|g'``) and should be surfaced to the LLM as recoverable + tool errors so it can switch tactics, not abort the whole turn. + """ + from nanobot.agent.runner import AgentRunner + + assert AgentRunner._is_workspace_violation( + "Error: Command blocked by safety guard (path outside working dir)" + ) is False + assert AgentRunner._is_workspace_violation( + "Error: Command blocked by safety guard (path traversal detected)" + ) is False + + +@pytest.mark.asyncio +async def test_runner_lets_llm_recover_from_shell_guard_path_outside(): + """End-to-end: a guard-blocked exec is a soft tool error, not a turn-fatal. + + Reporter scenario: a previous PR turned ``path outside working dir`` into + a turn-fatal RuntimeError, so when the false-positive guard fired the + user got no further iterations and (depending on channel) a silent hang. + After narrowing the marker list, the runner must hand the error back to + the LLM and let the next iteration succeed normally. + """ + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + captured_second_call: list[dict] = [] + + async def chat_with_retry(*, messages, **kwargs): + if provider.chat_with_retry.await_count == 1: + return LLMResponse( + content="trying noisy cleanup", + tool_calls=[ToolCallRequest( + id="call_blocked", + name="exec", + arguments={"command": "rm scratch.txt 2>/dev/null"}, + )], + ) + captured_second_call[:] = list(messages) + return LLMResponse(content="recovered final answer", tool_calls=[]) + + provider.chat_with_retry = AsyncMock(side_effect=chat_with_retry) + tools = MagicMock() + tools.get_definitions.return_value = [] + tools.execute = AsyncMock( + return_value="Error: Command blocked by safety guard (path outside working dir)" + ) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + )) + + assert provider.chat_with_retry.await_count == 2, ( + "guard hit must NOT short-circuit the loop -- LLM should get a second turn" + ) + assert result.stop_reason != "tool_error" + assert result.error is None + assert result.final_content == "recovered final answer" + assert result.tool_events and result.tool_events[0]["status"] == "error" + assert "workspace_violation" not in result.tool_events[0]["detail"] + + @pytest.mark.asyncio async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path): from nanobot.agent.runner import AgentRunSpec, AgentRunner diff --git a/tests/tools/test_exec_security.py b/tests/tools/test_exec_security.py index 64dc49563..b7ccf6a2b 100644 --- a/tests/tools/test_exec_security.py +++ b/tests/tools/test_exec_security.py @@ -182,3 +182,62 @@ async def test_exec_ignores_workspace_check_when_not_restricted(tmp_path): result = await tool.execute(command="echo ok", working_dir=str(other)) assert "ok" in result assert "outside the configured workspace" not in result + + +# --- #3599: stdio redirects to /dev/null must not trip the workspace guard ---- + + +@pytest.mark.parametrize( + "command", + [ + # The exact command from the #3599 reporter. + 'rm test_print.txt 2>/dev/null; echo "done"', + # Plain redirect of stdout / stderr. + "find . -type f >/dev/null", + "noisy_cmd 2>/dev/null", + "noisy_cmd >/dev/null 2>&1", + # Read from /dev/urandom is also a benign device read. + "head -c 16 /dev/urandom | xxd", + "echo done >/dev/stderr", + "echo line 2>/dev/null`` must succeed against the workspace guard.""" + workspace = tmp_path / "workspace" + workspace.mkdir() + target = workspace / "test_print.txt" + target.write_text("scratch") + tool = ExecTool(working_dir=str(workspace), restrict_to_workspace=True, timeout=5) + result = await tool.execute( + command=f'rm {target} 2>/dev/null; echo "done"', + working_dir=str(workspace), + ) + assert "done" in result + assert "path outside working dir" not in result + assert not target.exists() + + +def test_exec_still_blocks_real_outside_path_via_redirect(tmp_path): + """Redirect *targets* outside the workspace (not /dev/...) must still be blocked. + + We only whitelist kernel device files; arbitrary outside redirects such as + ``> /etc/issue`` should remain caught by the workspace guard so a buggy + LLM cannot exfiltrate data outside the workspace via stderr redirection. + """ + workspace = tmp_path / "workspace" + workspace.mkdir() + tool = ExecTool(working_dir=str(workspace), restrict_to_workspace=True) + blocked = tool._guard_command("echo pwn > /etc/issue", str(workspace)) + assert blocked is not None + assert "path outside working dir" in blocked