refactor(agent): simplify subagent concurrency with rejection over semaphore

Replace the asyncio.Semaphore queueing approach with a simple count
check in SpawnTool.execute(). When the concurrency limit is reached,
the tool returns an error string so the agent can perceive the reason
and adjust its behavior instead of silently queueing.

- Remove max_concurrent_subagents parameter threading through
  AgentLoop, commands.py, and nanobot.py
- SubagentManager reads the limit directly from AgentDefaults
- SpawnTool checks get_running_count() before calling spawn()
- Simplify tests to verify rejection behavior
This commit is contained in:
chengyongru 2026-05-05 21:11:27 +08:00 committed by chengyongru
parent 2103cd5602
commit 3baa869fdb
8 changed files with 49 additions and 86 deletions

View File

@ -1011,9 +1011,10 @@ MCP tools are automatically discovered and registered on startup. The LLM can us
## Subagent Concurrency ## Subagent Concurrency
By default, nanobot runs one spawned subagent at a time. This protects local LLM By default, nanobot only allows one spawned subagent at a time. When the limit is
servers from loading multiple KV caches at once. If your provider can handle more reached, the `spawn` tool returns an error so the agent can decide to wait or
parallel work, raise the limit: rearrange its work. This protects local LLM servers from loading multiple KV caches
at once. If your provider can handle more parallel work, raise the limit:
```json ```json
{ {
@ -1027,7 +1028,7 @@ parallel work, raise the limit:
| Option | Default | Description | | Option | Default | Description |
|--------|---------|-------------| |--------|---------|-------------|
| `agents.defaults.maxConcurrentSubagents` | `1` | Maximum number of spawned subagents that may execute at the same time. Extra subagents stay queued until a slot is free. | | `agents.defaults.maxConcurrentSubagents` | `1` | Maximum number of spawned subagents that may run at the same time. Attempts to spawn beyond this limit return an error. |
## Auto Compact ## Auto Compact

View File

@ -189,7 +189,6 @@ class AgentLoop:
workspace: Path, workspace: Path,
model: str | None = None, model: str | None = None,
max_iterations: int | None = None, max_iterations: int | None = None,
max_concurrent_subagents: int | None = None,
context_window_tokens: int | None = None, context_window_tokens: int | None = None,
context_block_limit: int | None = None, context_block_limit: int | None = None,
max_tool_result_chars: int | None = None, max_tool_result_chars: int | None = None,
@ -226,11 +225,6 @@ class AgentLoop:
self.max_iterations = ( self.max_iterations = (
max_iterations if max_iterations is not None else defaults.max_tool_iterations max_iterations if max_iterations is not None else defaults.max_tool_iterations
) )
self.max_concurrent_subagents = (
max_concurrent_subagents
if max_concurrent_subagents is not None
else defaults.max_concurrent_subagents
)
self.context_window_tokens = ( self.context_window_tokens = (
context_window_tokens context_window_tokens
if context_window_tokens is not None if context_window_tokens is not None
@ -269,7 +263,6 @@ class AgentLoop:
restrict_to_workspace=restrict_to_workspace, restrict_to_workspace=restrict_to_workspace,
disabled_skills=disabled_skills, disabled_skills=disabled_skills,
max_iterations=self.max_iterations, max_iterations=self.max_iterations,
max_concurrent_subagents=self.max_concurrent_subagents,
) )
self._unified_session = unified_session self._unified_session = unified_session
self._max_messages = max_messages if max_messages > 0 else 120 self._max_messages = max_messages if max_messages > 0 else 120

View File

@ -82,7 +82,6 @@ class SubagentManager:
restrict_to_workspace: bool = False, restrict_to_workspace: bool = False,
disabled_skills: list[str] | None = None, disabled_skills: list[str] | None = None,
max_iterations: int | None = None, max_iterations: int | None = None,
max_concurrent_subagents: int | None = None,
): ):
defaults = AgentDefaults() defaults = AgentDefaults()
self.provider = provider self.provider = provider
@ -99,12 +98,7 @@ class SubagentManager:
if max_iterations is not None if max_iterations is not None
else defaults.max_tool_iterations else defaults.max_tool_iterations
) )
self.max_concurrent_subagents = ( self.max_concurrent_subagents = defaults.max_concurrent_subagents
max_concurrent_subagents
if max_concurrent_subagents is not None
else defaults.max_concurrent_subagents
)
self._concurrency_gate = asyncio.Semaphore(self.max_concurrent_subagents)
self.runner = AgentRunner(provider) self.runner = AgentRunner(provider)
self._running_tasks: dict[str, asyncio.Task[None]] = {} self._running_tasks: dict[str, asyncio.Task[None]] = {}
self._task_statuses: dict[str, SubagentStatus] = {} self._task_statuses: dict[str, SubagentStatus] = {}
@ -138,9 +132,7 @@ class SubagentManager:
self._task_statuses[task_id] = status self._task_statuses[task_id] = status
bg_task = asyncio.create_task( bg_task = asyncio.create_task(
self._run_subagent_limited( self._run_subagent(task_id, task, display_label, origin, status, origin_message_id)
task_id, task, display_label, origin, status, origin_message_id
)
) )
self._running_tasks[task_id] = bg_task self._running_tasks[task_id] = bg_task
if session_key: if session_key:
@ -159,19 +151,6 @@ class SubagentManager:
logger.info("Spawned subagent [{}]: {}", task_id, display_label) logger.info("Spawned subagent [{}]: {}", task_id, display_label)
return f"Subagent [{display_label}] started (id: {task_id}). I'll notify you when it completes." return f"Subagent [{display_label}] started (id: {task_id}). I'll notify you when it completes."
async def _run_subagent_limited(
self,
task_id: str,
task: str,
label: str,
origin: dict[str, str],
status: SubagentStatus,
origin_message_id: str | None = None,
) -> None:
"""Wait for an execution slot, then run the subagent."""
async with self._concurrency_gate:
await self._run_subagent(task_id, task, label, origin, status, origin_message_id)
async def _run_subagent( async def _run_subagent(
self, self,
task_id: str, task_id: str,

View File

@ -56,6 +56,14 @@ class SpawnTool(Tool):
async def execute(self, task: str, label: str | None = None, **kwargs: Any) -> str: async def execute(self, task: str, label: str | None = None, **kwargs: Any) -> str:
"""Spawn a subagent to execute the given task.""" """Spawn a subagent to execute the given task."""
running = self._manager.get_running_count()
limit = self._manager.max_concurrent_subagents
if running >= limit:
return (
f"Cannot spawn subagent: concurrency limit reached "
f"({running}/{limit} running). Wait for a running subagent "
f"to complete before spawning a new one."
)
return await self._manager.spawn( return await self._manager.spawn(
task=task, task=task,
label=label, label=label,

View File

@ -514,7 +514,6 @@ def serve(
workspace=runtime_config.workspace_path, workspace=runtime_config.workspace_path,
model=runtime_config.agents.defaults.model, model=runtime_config.agents.defaults.model,
max_iterations=runtime_config.agents.defaults.max_tool_iterations, max_iterations=runtime_config.agents.defaults.max_tool_iterations,
max_concurrent_subagents=runtime_config.agents.defaults.max_concurrent_subagents,
context_window_tokens=runtime_config.agents.defaults.context_window_tokens, context_window_tokens=runtime_config.agents.defaults.context_window_tokens,
context_block_limit=runtime_config.agents.defaults.context_block_limit, context_block_limit=runtime_config.agents.defaults.context_block_limit,
max_tool_result_chars=runtime_config.agents.defaults.max_tool_result_chars, max_tool_result_chars=runtime_config.agents.defaults.max_tool_result_chars,
@ -628,7 +627,6 @@ def _run_gateway(
workspace=config.workspace_path, workspace=config.workspace_path,
model=provider_snapshot.model, model=provider_snapshot.model,
max_iterations=config.agents.defaults.max_tool_iterations, max_iterations=config.agents.defaults.max_tool_iterations,
max_concurrent_subagents=config.agents.defaults.max_concurrent_subagents,
context_window_tokens=provider_snapshot.context_window_tokens, context_window_tokens=provider_snapshot.context_window_tokens,
web_config=config.tools.web, web_config=config.tools.web,
context_block_limit=config.agents.defaults.context_block_limit, context_block_limit=config.agents.defaults.context_block_limit,
@ -1021,7 +1019,6 @@ def agent(
workspace=config.workspace_path, workspace=config.workspace_path,
model=config.agents.defaults.model, model=config.agents.defaults.model,
max_iterations=config.agents.defaults.max_tool_iterations, max_iterations=config.agents.defaults.max_tool_iterations,
max_concurrent_subagents=config.agents.defaults.max_concurrent_subagents,
context_window_tokens=config.agents.defaults.context_window_tokens, context_window_tokens=config.agents.defaults.context_window_tokens,
web_config=config.tools.web, web_config=config.tools.web,
context_block_limit=config.agents.defaults.context_block_limit, context_block_limit=config.agents.defaults.context_block_limit,

View File

@ -72,7 +72,6 @@ class Nanobot:
workspace=config.workspace_path, workspace=config.workspace_path,
model=defaults.model, model=defaults.model,
max_iterations=defaults.max_tool_iterations, max_iterations=defaults.max_tool_iterations,
max_concurrent_subagents=defaults.max_concurrent_subagents,
context_window_tokens=defaults.context_window_tokens, context_window_tokens=defaults.context_window_tokens,
context_block_limit=defaults.context_block_limit, context_block_limit=defaults.context_block_limit,
max_tool_result_chars=defaults.max_tool_result_chars, max_tool_result_chars=defaults.max_tool_result_chars,

View File

@ -94,9 +94,10 @@ async def test_subagent_uses_configured_max_iterations(tmp_path):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_subagent_manager_limits_concurrent_spawn_runs(tmp_path): async def test_spawn_tool_rejects_when_at_concurrency_limit(tmp_path):
"""Spawned subagents should not execute more than the configured limit at once.""" """SpawnTool should return an error string when the concurrency limit is reached."""
from nanobot.agent.subagent import SubagentManager from nanobot.agent.subagent import SubagentManager
from nanobot.agent.tools.spawn import SpawnTool
from nanobot.bus.queue import MessageBus from nanobot.bus.queue import MessageBus
bus = MessageBus() bus = MessageBus()
@ -107,25 +108,14 @@ async def test_subagent_manager_limits_concurrent_spawn_runs(tmp_path):
workspace=tmp_path, workspace=tmp_path,
bus=bus, bus=bus,
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
max_concurrent_subagents=1,
) )
mgr._announce_result = AsyncMock() mgr._announce_result = AsyncMock()
active = 0 # Block the first subagent so it stays "running"
max_active = 0 release = asyncio.Event()
started_count = 0
starts: asyncio.Queue[int] = asyncio.Queue()
releases = [asyncio.Event(), asyncio.Event()]
async def fake_run(spec): async def fake_run(spec):
nonlocal active, max_active, started_count await release.wait()
index = started_count
started_count += 1
active += 1
max_active = max(max_active, active)
await starts.put(index)
await releases[index].wait()
active -= 1
return SimpleNamespace( return SimpleNamespace(
stop_reason="done", stop_reason="done",
final_content="done", final_content="done",
@ -135,21 +125,22 @@ async def test_subagent_manager_limits_concurrent_spawn_runs(tmp_path):
mgr.runner.run = AsyncMock(side_effect=fake_run) mgr.runner.run = AsyncMock(side_effect=fake_run)
await mgr.spawn("first task", session_key="test:session") tool = SpawnTool(mgr)
await mgr.spawn("second task", session_key="test:session") tool.set_context("test", "c1", "test:c1")
tasks = list(mgr._running_tasks.values())
first_index = await asyncio.wait_for(starts.get(), timeout=0.5) # First spawn succeeds
with pytest.raises(asyncio.TimeoutError): result = await tool.execute(task="first task")
await asyncio.wait_for(starts.get(), timeout=0.05) assert "started" in result
releases[first_index].set() # Second spawn should be rejected (default limit is 1)
second_index = await asyncio.wait_for(starts.get(), timeout=0.5) result = await tool.execute(task="second task")
releases[second_index].set() assert "Cannot spawn subagent" in result
await asyncio.gather(*tasks) assert "concurrency limit reached" in result
assert max_active == 1 # Release the first subagent
assert mgr.runner.run.await_count == 2 release.set()
# Allow cleanup
await asyncio.gather(*mgr._running_tasks.values(), return_exceptions=True)
def test_subagent_default_max_concurrent_matches_agent_defaults(tmp_path): def test_subagent_default_max_concurrent_matches_agent_defaults(tmp_path):
@ -210,26 +201,6 @@ def test_agent_loop_passes_max_iterations_to_subagents(tmp_path):
assert loop.subagents.max_iterations == 42 assert loop.subagents.max_iterations == 42
def test_agent_loop_passes_max_concurrent_subagents_to_subagents(tmp_path):
"""AgentLoop's configured subagent concurrency limit should be shared with subagents."""
from nanobot.agent.loop import AgentLoop
from nanobot.bus.queue import MessageBus
bus = MessageBus()
provider = MagicMock()
provider.get_default_model.return_value = "test-model"
loop = AgentLoop(
bus=bus,
provider=provider,
workspace=tmp_path,
model="test-model",
max_concurrent_subagents=2,
)
assert loop.subagents.max_concurrent_subagents == 2
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_agent_loop_syncs_updated_max_iterations_before_run(tmp_path): async def test_agent_loop_syncs_updated_max_iterations_before_run(tmp_path):
"""Runtime max_iterations changes should be reflected before tool execution.""" """Runtime max_iterations changes should be reflected before tool execution."""

View File

@ -49,6 +49,11 @@ async def test_spawn_tool_keeps_task_local_context() -> None:
release = asyncio.Event() release = asyncio.Event()
class _Manager: class _Manager:
max_concurrent_subagents = 1
def get_running_count(self) -> int:
return 0
async def spawn( async def spawn(
self, self,
*, *,
@ -156,6 +161,11 @@ async def test_spawn_tool_basic_set_context_and_execute() -> None:
seen: list[tuple[str, str, str]] = [] seen: list[tuple[str, str, str]] = []
class _Manager: class _Manager:
max_concurrent_subagents = 1
def get_running_count(self) -> int:
return 0
async def spawn( async def spawn(
self, self,
*, *,
@ -183,6 +193,11 @@ async def test_spawn_tool_default_values_without_set_context() -> None:
seen: list[tuple[str, str, str]] = [] seen: list[tuple[str, str, str]] = []
class _Manager: class _Manager:
max_concurrent_subagents = 1
def get_running_count(self) -> int:
return 0
async def spawn( async def spawn(
self, self,
*, *,