diff --git a/docs/configuration.md b/docs/configuration.md index ec889c758..d0a7fe940 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1009,6 +1009,28 @@ MCP tools are automatically discovered and registered on startup. The LLM can us **Docker security**: The official Docker image runs as a non-root user (`nanobot`, UID 1000) with bubblewrap pre-installed. When using `docker-compose.yml`, the container drops all Linux capabilities except `SYS_ADMIN` (required for bwrap's namespace isolation). +## Subagent Concurrency + +By default, nanobot only allows one spawned subagent at a time. When the limit is +reached, the `spawn` tool returns an error so the agent can decide to wait or +rearrange its work. This protects local LLM servers from loading multiple KV caches +at once. If your provider can handle more parallel work, raise the limit: + +```json +{ + "agents": { + "defaults": { + "maxConcurrentSubagents": 2 + } + } +} +``` + +| Option | Default | Description | +|--------|---------|-------------| +| `agents.defaults.maxConcurrentSubagents` | `1` | Maximum number of spawned subagents that may run at the same time. Attempts to spawn beyond this limit return an error. | + + ## Auto Compact When a user is idle for longer than a configured threshold, nanobot **proactively** compresses the older part of the session context into a summary while keeping a recent legal suffix of live messages. This reduces token cost and first-token latency when the user returns — instead of re-processing a long stale context with an expired KV cache, the model receives a compact summary, the most recent live context, and fresh input. diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py index 18f0bd53b..6d64698a7 100644 --- a/nanobot/agent/subagent.py +++ b/nanobot/agent/subagent.py @@ -83,6 +83,7 @@ class SubagentManager: disabled_skills: list[str] | None = None, max_iterations: int | None = None, ): + defaults = AgentDefaults() self.provider = provider self.workspace = workspace self.bus = bus @@ -95,8 +96,9 @@ class SubagentManager: self.max_iterations = ( max_iterations if max_iterations is not None - else AgentDefaults().max_tool_iterations + else defaults.max_tool_iterations ) + self.max_concurrent_subagents = defaults.max_concurrent_subagents self.runner = AgentRunner(provider) self._running_tasks: dict[str, asyncio.Task[None]] = {} self._task_statuses: dict[str, SubagentStatus] = {} diff --git a/nanobot/agent/tools/spawn.py b/nanobot/agent/tools/spawn.py index a1acf0aae..17ad48d12 100644 --- a/nanobot/agent/tools/spawn.py +++ b/nanobot/agent/tools/spawn.py @@ -56,6 +56,14 @@ class SpawnTool(Tool): async def execute(self, task: str, label: str | None = None, **kwargs: Any) -> str: """Spawn a subagent to execute the given task.""" + running = self._manager.get_running_count() + limit = self._manager.max_concurrent_subagents + if running >= limit: + return ( + f"Cannot spawn subagent: concurrency limit reached " + f"({running}/{limit} running). Wait for a running subagent " + f"to complete before spawning a new one." + ) return await self._manager.spawn( task=task, label=label, diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index be4eb7202..2f20eb99e 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -78,6 +78,7 @@ class AgentDefaults(Base): context_block_limit: int | None = None temperature: float = 0.1 max_tool_iterations: int = 200 + max_concurrent_subagents: int = Field(default=1, ge=1) max_tool_result_chars: int = 16_000 provider_retry_mode: Literal["standard", "persistent"] = "standard" reasoning_effort: str | None = None # low / medium / high / adaptive - enables LLM thinking mode diff --git a/tests/agent/tools/test_subagent_tools.py b/tests/agent/tools/test_subagent_tools.py index a050a4271..f43f98f24 100644 --- a/tests/agent/tools/test_subagent_tools.py +++ b/tests/agent/tools/test_subagent_tools.py @@ -93,6 +93,75 @@ async def test_subagent_uses_configured_max_iterations(tmp_path): mgr.runner.run.assert_awaited_once() +@pytest.mark.asyncio +async def test_spawn_tool_rejects_when_at_concurrency_limit(tmp_path): + """SpawnTool should return an error string when the concurrency limit is reached.""" + from nanobot.agent.subagent import SubagentManager + from nanobot.agent.tools.spawn import SpawnTool + from nanobot.bus.queue import MessageBus + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + mgr = SubagentManager( + provider=provider, + workspace=tmp_path, + bus=bus, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + ) + mgr._announce_result = AsyncMock() + + # Block the first subagent so it stays "running" + release = asyncio.Event() + + async def fake_run(spec): + await release.wait() + return SimpleNamespace( + stop_reason="done", + final_content="done", + error=None, + tool_events=[], + ) + + mgr.runner.run = AsyncMock(side_effect=fake_run) + + tool = SpawnTool(mgr) + tool.set_context("test", "c1", "test:c1") + + # First spawn succeeds + result = await tool.execute(task="first task") + assert "started" in result + + # Second spawn should be rejected (default limit is 1) + result = await tool.execute(task="second task") + assert "Cannot spawn subagent" in result + assert "concurrency limit reached" in result + + # Release the first subagent + release.set() + # Allow cleanup + await asyncio.gather(*mgr._running_tasks.values(), return_exceptions=True) + + +def test_subagent_default_max_concurrent_matches_agent_defaults(tmp_path): + """Direct SubagentManager construction should use the agent default concurrency limit.""" + from nanobot.agent.subagent import SubagentManager + from nanobot.bus.queue import MessageBus + + bus = MessageBus() + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + + mgr = SubagentManager( + provider=provider, + workspace=tmp_path, + bus=bus, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + ) + + assert mgr.max_concurrent_subagents == AgentDefaults().max_concurrent_subagents + + def test_subagent_default_max_iterations_matches_agent_defaults(tmp_path): """Direct SubagentManager construction should use the agent default limit.""" from nanobot.agent.subagent import SubagentManager diff --git a/tests/test_tool_contextvars.py b/tests/test_tool_contextvars.py index a1e7bd8c0..3763ba980 100644 --- a/tests/test_tool_contextvars.py +++ b/tests/test_tool_contextvars.py @@ -49,6 +49,11 @@ async def test_spawn_tool_keeps_task_local_context() -> None: release = asyncio.Event() class _Manager: + max_concurrent_subagents = 1 + + def get_running_count(self) -> int: + return 0 + async def spawn( self, *, @@ -156,6 +161,11 @@ async def test_spawn_tool_basic_set_context_and_execute() -> None: seen: list[tuple[str, str, str]] = [] class _Manager: + max_concurrent_subagents = 1 + + def get_running_count(self) -> int: + return 0 + async def spawn( self, *, @@ -183,6 +193,11 @@ async def test_spawn_tool_default_values_without_set_context() -> None: seen: list[tuple[str, str, str]] = [] class _Manager: + max_concurrent_subagents = 1 + + def get_running_count(self) -> int: + return 0 + async def spawn( self, *,