diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 9a5eb65a9..6a90349a0 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -257,7 +257,8 @@ class AgentLoop: self._runtime_vars: dict[str, Any] = {} self._unregistered_tools: dict[str, Tool] = {} self._config_snapshots: dict[str, dict[str, Any]] = {} - self._backup_critical_tools() + if self_evolution: + self._backup_critical_tools() self.commands = CommandRouter() register_builtin_commands(self.commands) diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index 571bcc792..b949335ca 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -2,7 +2,9 @@ import asyncio import json +import time import uuid +from dataclasses import dataclass, field from pathlib import Path from typing import Any @@ -23,12 +25,29 @@ from nanobot.config.schema import ExecToolConfig, WebToolsConfig from nanobot.providers.base import LLMProvider -class _SubagentHook(AgentHook): - """Logging-only hook for subagent execution.""" +@dataclass(slots=True) +class SubagentStatus: + """Real-time status of a running subagent.""" - def __init__(self, task_id: str) -> None: + task_id: str + label: str + task_description: str + started_at: float # time.monotonic() + phase: str = "initializing" # initializing | awaiting_tools | tools_completed | final_response | done | error + iteration: int = 0 + tool_events: list = field(default_factory=list) # [{name, status, detail}, ...] + usage: dict = field(default_factory=dict) # token usage + stop_reason: str | None = None + error: str | None = None + + +class _SubagentHook(AgentHook): + """Hook for subagent execution — logs tool calls and updates status.""" + + def __init__(self, task_id: str, status: SubagentStatus | None = None) -> None: super().__init__() self._task_id = task_id + self._status = status async def before_execute_tools(self, context: AgentHookContext) -> None: for tool_call in context.tool_calls: @@ -38,6 +57,15 @@ class _SubagentHook(AgentHook): self._task_id, tool_call.name, args_str, ) + async def after_iteration(self, context: AgentHookContext) -> None: + if self._status is None: + return + self._status.iteration = context.iteration + self._status.tool_events = list(context.tool_events) + self._status.usage = dict(context.usage) + if context.error: + self._status.error = str(context.error) + class SubagentManager: """Manages background subagent execution.""" @@ -67,6 +95,7 @@ class SubagentManager: self.disabled_skills = set(disabled_skills or []) self.runner = AgentRunner(provider) self._running_tasks: dict[str, asyncio.Task[None]] = {} + self._task_statuses: dict[str, SubagentStatus] = {} self._session_tasks: dict[str, set[str]] = {} # session_key -> {task_id, ...} async def spawn( @@ -82,8 +111,16 @@ class SubagentManager: display_label = label or task[:30] + ("..." if len(task) > 30 else "") origin = {"channel": origin_channel, "chat_id": origin_chat_id} + status = SubagentStatus( + task_id=task_id, + label=display_label, + task_description=task, + started_at=time.monotonic(), + ) + self._task_statuses[task_id] = status + bg_task = asyncio.create_task( - self._run_subagent(task_id, task, display_label, origin) + self._run_subagent(task_id, task, display_label, origin, status) ) self._running_tasks[task_id] = bg_task if session_key: @@ -91,6 +128,7 @@ class SubagentManager: def _cleanup(_: asyncio.Task) -> None: self._running_tasks.pop(task_id, None) + self._task_statuses.pop(task_id, None) if session_key and (ids := self._session_tasks.get(session_key)): ids.discard(task_id) if not ids: @@ -107,10 +145,15 @@ class SubagentManager: task: str, label: str, origin: dict[str, str], + status: SubagentStatus, ) -> None: """Execute the subagent task and announce the result.""" logger.info("Subagent [{}] starting task: {}", task_id, label) + async def _on_checkpoint(payload: dict) -> None: + status.phase = payload.get("phase", status.phase) + status.iteration = payload.get("iteration", status.iteration) + try: # Build subagent tools (no message tool, no spawn tool) tools = ToolRegistry() @@ -145,12 +188,16 @@ class SubagentManager: model=self.model, max_iterations=15, max_tool_result_chars=self.max_tool_result_chars, - hook=_SubagentHook(task_id), + hook=_SubagentHook(task_id, status), max_iterations_message="Task completed but no final response was generated.", error_message=None, fail_on_tool_error=True, + checkpoint_callback=_on_checkpoint, )) if result.stop_reason == "tool_error": + status.phase = "done" + status.stop_reason = result.stop_reason + status.tool_events = list(result.tool_events) await self._announce_result( task_id, label, @@ -161,6 +208,8 @@ class SubagentManager: ) return if result.stop_reason == "error": + status.phase = "done" + status.stop_reason = result.stop_reason await self._announce_result( task_id, label, @@ -173,10 +222,14 @@ class SubagentManager: final_result = result.final_content or "Task completed but no final response was generated." logger.info("Subagent [{}] completed successfully", task_id) + status.phase = "done" + status.stop_reason = result.stop_reason await self._announce_result(task_id, label, task, final_result, origin, "ok") except Exception as e: error_msg = f"Error: {str(e)}" + status.phase = "error" + status.error = str(e) logger.error("Subagent [{}] failed: {}", task_id, e) await self._announce_result(task_id, label, task, error_msg, origin, "error") diff --git a/nanobot/agent/tools/self.py b/nanobot/agent/tools/self.py index cf43f2201..c30d68b12 100644 --- a/nanobot/agent/tools/self.py +++ b/nanobot/agent/tools/self.py @@ -2,8 +2,10 @@ from __future__ import annotations +import asyncio import copy import inspect +import time from typing import TYPE_CHECKING, Any from loguru import logger @@ -56,6 +58,7 @@ class SelfTool(Tool): _MAX_VALUE_ELEMENTS = 1024 _MAX_RUNTIME_KEYS = 64 + _CALL_TIMEOUT_SECONDS = 30 def __init__(self, loop: AgentLoop) -> None: self._loop = loop @@ -160,6 +163,8 @@ class SelfTool(Tool): @staticmethod def _format_value(val: Any, key: str = "") -> str: """Format a value for inspect output.""" + from nanobot.agent.subagent import SubagentStatus + if val is None: return f"{key}: None" if key else "None" @@ -168,6 +173,36 @@ class SelfTool(Tool): text = f"{key}: {val!r}" if key else repr(val) return text + # SubagentStatus → rich single-status display (before callable check) + if isinstance(val, SubagentStatus): + elapsed = time.monotonic() - val.started_at + tool_summary = ", ".join( + f"{e.get('name', '?')}({e.get('status', '?')})" for e in val.tool_events[-5:] + ) or "none" + lines = [ + f"Subagent [{val.task_id}] '{val.label}'", + f" task: {val.task_description}", + f" phase: {val.phase}, iteration: {val.iteration}, elapsed: {elapsed:.1f}s", + f" tools: {tool_summary}", + f" usage: {val.usage or 'n/a'}", + ] + if val.error: + lines.append(f" error: {val.error}") + if val.stop_reason: + lines.append(f" stop_reason: {val.stop_reason}") + return "\n".join(lines) + + # Has _running_tasks → SubagentManager-like (before callable check) + if hasattr(val, "_running_tasks") and isinstance(getattr(val, "_running_tasks"), dict): + # Try _task_statuses first for rich display + statuses = getattr(val, "_task_statuses", None) + if statuses and isinstance(statuses, dict) and len(statuses) > 0: + return SelfTool._format_value(statuses, key) + tasks = val._running_tasks + count = len(tasks) + ids = list(tasks.keys())[:10] + return f"subagents: {count} running — {ids}" + # Callable → method hint if callable(val): name = getattr(val, "__name__", str(val)) @@ -179,8 +214,27 @@ class SelfTool(Tool): label = f"{key}." if key else "" return f"method {label}{name}{sig} — use 'call' action to invoke" - # Dict + # Dict — check for dict[str, SubagentStatus] first if isinstance(val, dict): + if val and all(isinstance(v, SubagentStatus) for v in val.values()): + prefix = f"{key}: " if key else "" + count = len(val) + if count == 0: + return f"{prefix}no running subagents" + lines = [f"{prefix}{count} subagent(s):"] + for tid, st in val.items(): + elapsed = time.monotonic() - st.started_at + tool_summary = ", ".join( + f"{e.get('name', '?')}({e.get('status', '?')})" for e in st.tool_events[-5:] + ) or "none" + lines.append( + f" [{tid}] '{st.label}'\n" + f" phase: {st.phase}, iteration: {st.iteration}, elapsed: {elapsed:.1f}s\n" + f" tools: {tool_summary}\n" + f" usage: {st.usage or 'n/a'}" + ) + return "\n".join(lines) + r = repr(val) if len(r) > 2000: r = r[:2000] + "... (truncated)" @@ -200,13 +254,6 @@ class SelfTool(Tool): names = val.tool_names return f"tools: {len(names)} registered — {names}" - # Has _running_tasks → SubagentManager-like - if hasattr(val, "_running_tasks") and isinstance(getattr(val, "_running_tasks"), dict): - tasks = val._running_tasks - count = len(tasks) - ids = list(tasks.keys())[:10] - return f"subagents: {count} running — {ids}" - # Generic object → list public attrs attrs = [a for a in dir(val) if not a.startswith("_") and not callable(getattr(val, a, None))] cls_name = type(val).__name__ @@ -307,6 +354,9 @@ class SelfTool(Tool): # Dot-path modify: resolve parent, set leaf if "." in key: parent_path, leaf = key.rsplit(".", 1) + if leaf in self._DENIED_ATTRS or leaf.startswith("__"): + self._audit("modify", f"BLOCKED leaf '{leaf}'") + return f"Error: '{leaf}' is not accessible" parent, err = self._resolve_path(parent_path) if err: return f"Error: {err}" @@ -372,27 +422,29 @@ class SelfTool(Tool): return f"Set _runtime_vars.{key} = {value!r}" @classmethod - def _validate_json_safe(cls, value: Any, depth: int = 0, elements: int = 0) -> str | None: + def _validate_json_safe(cls, value: Any, depth: int = 0, _elements: list[int] | None = None) -> str | None: + if _elements is None: + _elements = [0] if depth > 10: return "value nesting too deep (max 10 levels)" if isinstance(value, (str, int, float, bool, type(None))): return None if isinstance(value, list): - elements += len(value) - if elements > cls._MAX_VALUE_ELEMENTS: + _elements[0] += len(value) + if _elements[0] > cls._MAX_VALUE_ELEMENTS: return f"value too large (max {cls._MAX_VALUE_ELEMENTS} total elements)" for i, item in enumerate(value): - if err := cls._validate_json_safe(item, depth + 1, elements): + if err := cls._validate_json_safe(item, depth + 1, _elements): return f"list[{i}] contains {err}" return None if isinstance(value, dict): - elements += len(value) - if elements > cls._MAX_VALUE_ELEMENTS: + _elements[0] += len(value) + if _elements[0] > cls._MAX_VALUE_ELEMENTS: return f"value too large (max {cls._MAX_VALUE_ELEMENTS} total elements)" for k, v in value.items(): if not isinstance(k, str): return f"dict key must be str, got {type(k).__name__}" - if err := cls._validate_json_safe(v, depth + 1, elements): + if err := cls._validate_json_safe(v, depth + 1, _elements): return f"dict key '{k}' contains {err}" return None return f"unsupported type {type(value).__name__}" @@ -412,9 +464,12 @@ class SelfTool(Tool): result = obj(**(args or {})) # Await if coroutine if inspect.isawaitable(result): - result = await result + result = await asyncio.wait_for(result, timeout=self._CALL_TIMEOUT_SECONDS) self._audit("call", method) return repr(result) + except asyncio.TimeoutError: + self._audit("call", f"{method} TIMEOUT after {self._CALL_TIMEOUT_SECONDS}s") + return f"Error: call to '{method}' timed out after {self._CALL_TIMEOUT_SECONDS}s" except Exception as e: self._audit("call", f"{method} FAILED: {e}") return f"Error calling {method}: {e}" diff --git a/tests/agent/tools/test_self_tool.py b/tests/agent/tools/test_self_tool.py index fd20fb4bb..030ca3c99 100644 --- a/tests/agent/tools/test_self_tool.py +++ b/tests/agent/tools/test_self_tool.py @@ -3,6 +3,7 @@ from __future__ import annotations import copy +import time from pathlib import Path from unittest.mock import AsyncMock, MagicMock @@ -263,6 +264,28 @@ class TestModifyBlocked: result = await tool.execute(action="modify", key="__class__", value="evil") assert "protected" in result + @pytest.mark.asyncio + async def test_modify_dotpath_leaf_dunder_blocked(self): + """Fix 3.1: leaf segment of dot-path must also be validated.""" + tool = _make_tool() + result = await tool.execute( + action="modify", + key="provider_retry_mode.__class__", + value="evil", + ) + assert "not accessible" in result + + @pytest.mark.asyncio + async def test_modify_dotpath_leaf_denied_attr_blocked(self): + """Fix 3.1: leaf segment matching _DENIED_ATTRS must be rejected.""" + tool = _make_tool() + result = await tool.execute( + action="modify", + key="provider_retry_mode.__globals__", + value={}, + ) + assert "not accessible" in result + # --------------------------------------------------------------------------- # modify — free tier (setattr priority) @@ -370,6 +393,31 @@ class TestModifyOpen: assert "Set workspace" in result +# --------------------------------------------------------------------------- +# validate_json_safe — element counting +# --------------------------------------------------------------------------- + +class TestValidateJsonSafe: + + def test_sibling_lists_counted_correctly(self): + """Fix 3.3: sibling list elements should accumulate across recursion.""" + # Two sibling lists of 600 each = 1200 total elements, over the 1024 limit + big_value = {"a": list(range(600)), "b": list(range(600))} + err = SelfTool._validate_json_safe(big_value) + assert err is not None + assert "too large" in err + + def test_single_list_within_limit(self): + """A single list of 500 items should pass.""" + ok_value = list(range(500)) + assert SelfTool._validate_json_safe(ok_value) is None + + def test_deeply_nested_within_limit(self): + """Deeply nested structures that stay under limit should pass.""" + value = {"level1": {"level2": {"level3": list(range(100))}}} + assert SelfTool._validate_json_safe(value) is None + + # --------------------------------------------------------------------------- # call — method invocation # --------------------------------------------------------------------------- @@ -410,6 +458,25 @@ class TestCall: ) assert "completed" in result.lower() or result # just no error + @pytest.mark.asyncio + async def test_call_async_method_timeout(self): + """Fix 3.2: async call that exceeds timeout should be cancelled.""" + import asyncio + + loop = _make_mock_loop() + loop.consolidator = MagicMock() + + async def _slow(**kwargs): + await asyncio.sleep(60) + + loop.consolidator.slow_method = _slow + tool = _make_tool(loop) + result = await tool.execute( + action="call", + method="consolidator.slow_method", + ) + assert "timed out" in result + @pytest.mark.asyncio async def test_call_blocked_attr_in_path(self): tool = _make_tool() @@ -713,3 +780,279 @@ class TestWatchdog: loop._watchdog_check() assert loop.max_iterations == 50 assert loop.context_window_tokens == 131072 + + +# --------------------------------------------------------------------------- +# SubagentStatus formatting +# --------------------------------------------------------------------------- + +class TestSubagentStatusFormatting: + + def test_format_single_status(self): + """_format_value should produce a rich multi-line display for a SubagentStatus.""" + from nanobot.agent.subagent import SubagentStatus + + status = SubagentStatus( + task_id="abc12345", + label="read logs and summarize", + task_description="Read the log files and produce a summary", + started_at=time.monotonic() - 12.4, + phase="awaiting_tools", + iteration=3, + tool_events=[ + {"name": "read_file", "status": "ok", "detail": "read app.log"}, + {"name": "grep", "status": "ok", "detail": "searched ERROR"}, + {"name": "exec", "status": "error", "detail": "timeout"}, + ], + usage={"prompt_tokens": 4500, "completion_tokens": 1200}, + ) + result = SelfTool._format_value(status) + assert "abc12345" in result + assert "read logs and summarize" in result + assert "awaiting_tools" in result + assert "iteration: 3" in result + assert "read_file(ok)" in result + assert "exec(error)" in result + assert "4500" in result + + def test_format_status_dict(self): + """_format_value should handle dict[str, SubagentStatus] with rich display.""" + from nanobot.agent.subagent import SubagentStatus + + statuses = { + "abc12345": SubagentStatus( + task_id="abc12345", + label="task A", + task_description="Do task A", + started_at=time.monotonic() - 5.0, + phase="awaiting_tools", + iteration=1, + ), + } + result = SelfTool._format_value(statuses) + assert "1 subagent(s)" in result + assert "abc12345" in result + assert "task A" in result + + def test_format_empty_status_dict(self): + """Empty dict[str, SubagentStatus] should show 'no running subagents'.""" + result = SelfTool._format_value({}) + assert "{}" in result + + def test_format_status_with_error(self): + """Status with error should include the error message.""" + from nanobot.agent.subagent import SubagentStatus + + status = SubagentStatus( + task_id="err00001", + label="failing task", + task_description="A task that fails", + started_at=time.monotonic() - 1.0, + phase="error", + error="Connection refused", + ) + result = SelfTool._format_value(status) + assert "error: Connection refused" in result + + def test_format_subagent_manager_with_statuses(self): + """SubagentManager-like object with _task_statuses should show rich display.""" + from nanobot.agent.subagent import SubagentStatus + + mgr = MagicMock() + mgr._running_tasks = {"abc": MagicMock()} + mgr._task_statuses = { + "abc": SubagentStatus( + task_id="abc", + label="work", + task_description="Do work", + started_at=time.monotonic() - 2.0, + phase="tools_completed", + iteration=2, + ), + } + result = SelfTool._format_value(mgr) + assert "abc" in result + assert "work" in result + assert "tools_completed" in result + + def test_format_subagent_manager_fallback_no_statuses(self): + """SubagentManager with empty _task_statuses falls back to simple display.""" + mgr = MagicMock() + mgr._running_tasks = {"abc": MagicMock()} + mgr._task_statuses = {} + result = SelfTool._format_value(mgr) + assert "1 running" in result + + +# --------------------------------------------------------------------------- +# _SubagentHook after_iteration updates status +# --------------------------------------------------------------------------- + +class TestSubagentHookStatus: + + @pytest.mark.asyncio + async def test_after_iteration_updates_status(self): + """after_iteration should copy iteration, tool_events, usage to status.""" + from nanobot.agent.subagent import SubagentStatus, _SubagentHook + from nanobot.agent.hook import AgentHookContext + + status = SubagentStatus( + task_id="test", + label="test", + task_description="test", + started_at=time.monotonic(), + ) + hook = _SubagentHook("test", status) + + context = AgentHookContext( + iteration=5, + messages=[], + tool_events=[{"name": "read_file", "status": "ok", "detail": "ok"}], + usage={"prompt_tokens": 100, "completion_tokens": 50}, + ) + await hook.after_iteration(context) + + assert status.iteration == 5 + assert len(status.tool_events) == 1 + assert status.tool_events[0]["name"] == "read_file" + assert status.usage == {"prompt_tokens": 100, "completion_tokens": 50} + + @pytest.mark.asyncio + async def test_after_iteration_with_error(self): + """after_iteration should set status.error when context has an error.""" + from nanobot.agent.subagent import SubagentStatus, _SubagentHook + from nanobot.agent.hook import AgentHookContext + + status = SubagentStatus( + task_id="test", + label="test", + task_description="test", + started_at=time.monotonic(), + ) + hook = _SubagentHook("test", status) + + context = AgentHookContext( + iteration=1, + messages=[], + error="something went wrong", + ) + await hook.after_iteration(context) + + assert status.error == "something went wrong" + + @pytest.mark.asyncio + async def test_after_iteration_no_status_is_noop(self): + """after_iteration with no status should be a no-op.""" + from nanobot.agent.subagent import _SubagentHook + from nanobot.agent.hook import AgentHookContext + + hook = _SubagentHook("test") + context = AgentHookContext(iteration=1, messages=[]) + await hook.after_iteration(context) # should not raise + + +# --------------------------------------------------------------------------- +# Checkpoint callback updates status +# --------------------------------------------------------------------------- + +class TestCheckpointCallback: + + @pytest.mark.asyncio + async def test_checkpoint_updates_phase_and_iteration(self): + """The _on_checkpoint callback should update status.phase and iteration.""" + from nanobot.agent.subagent import SubagentStatus + import asyncio + + status = SubagentStatus( + task_id="cp", + label="test", + task_description="test", + started_at=time.monotonic(), + ) + + # Simulate the checkpoint callback as defined in _run_subagent + async def _on_checkpoint(payload: dict) -> None: + status.phase = payload.get("phase", status.phase) + status.iteration = payload.get("iteration", status.iteration) + + await _on_checkpoint({"phase": "awaiting_tools", "iteration": 2}) + assert status.phase == "awaiting_tools" + assert status.iteration == 2 + + await _on_checkpoint({"phase": "tools_completed", "iteration": 3}) + assert status.phase == "tools_completed" + assert status.iteration == 3 + + @pytest.mark.asyncio + async def test_checkpoint_preserves_phase_on_missing_key(self): + """If payload doesn't have 'phase', status.phase should stay unchanged.""" + from nanobot.agent.subagent import SubagentStatus + + status = SubagentStatus( + task_id="cp", + label="test", + task_description="test", + started_at=time.monotonic(), + phase="initializing", + ) + + async def _on_checkpoint(payload: dict) -> None: + status.phase = payload.get("phase", status.phase) + status.iteration = payload.get("iteration", status.iteration) + + await _on_checkpoint({"iteration": 1}) + assert status.phase == "initializing" + assert status.iteration == 1 + + +# --------------------------------------------------------------------------- +# inspect subagents._task_statuses via dot-path +# --------------------------------------------------------------------------- + +class TestInspectTaskStatuses: + + @pytest.mark.asyncio + async def test_inspect_task_statuses_dotpath(self): + """self inspect subagents._task_statuses should show rich status.""" + from nanobot.agent.subagent import SubagentStatus + + loop = _make_mock_loop() + loop.subagents._task_statuses = { + "abc12345": SubagentStatus( + task_id="abc12345", + label="read logs", + task_description="Read the log files", + started_at=time.monotonic() - 8.0, + phase="awaiting_tools", + iteration=2, + tool_events=[{"name": "read_file", "status": "ok", "detail": "ok"}], + usage={"prompt_tokens": 500, "completion_tokens": 100}, + ), + } + tool = _make_tool(loop) + result = await tool.execute(action="inspect", key="subagents._task_statuses") + assert "abc12345" in result + assert "read logs" in result + assert "awaiting_tools" in result + + @pytest.mark.asyncio + async def test_inspect_single_subagent_status(self): + """self inspect should format a single SubagentStatus object.""" + from nanobot.agent.subagent import SubagentStatus + + loop = _make_mock_loop() + status = SubagentStatus( + task_id="xyz", + label="search code", + task_description="Search the codebase", + started_at=time.monotonic() - 3.0, + phase="done", + iteration=4, + stop_reason="completed", + ) + loop.subagents._task_statuses = {"xyz": status} + tool = _make_tool(loop) + result = await tool.execute(action="inspect", key="subagents._task_statuses.xyz") + assert "xyz" in result + assert "search code" in result + assert "stop_reason: completed" in result