diff --git a/docs/configuration.md b/docs/configuration.md
index ec889c758..d0a7fe940 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1009,6 +1009,28 @@ MCP tools are automatically discovered and registered on startup. The LLM can us
 **Docker security**: The official Docker image runs as a non-root user (`nanobot`, UID 1000) with bubblewrap pre-installed. When using `docker-compose.yml`, the container drops all Linux capabilities except `SYS_ADMIN` (required for bwrap's namespace isolation).
 
 
+## Subagent Concurrency
+
+By default, nanobot only allows one spawned subagent at a time. When the limit is
+reached, the `spawn` tool returns an error so the agent can decide to wait or
+rearrange its work. This protects local LLM servers from loading multiple KV caches
+at once. If your provider can handle more parallel work, raise the limit:
+
+```json
+{
+  "agents": {
+    "defaults": {
+      "maxConcurrentSubagents": 2
+    }
+  }
+}
+```
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `agents.defaults.maxConcurrentSubagents` | `1` | Maximum number of spawned subagents that may run at the same time. Attempts to spawn beyond this limit return an error. |
+
+
 ## Auto Compact
 
 When a user is idle for longer than a configured threshold, nanobot **proactively** compresses the older part of the session context into a summary while keeping a recent legal suffix of live messages. This reduces token cost and first-token latency when the user returns — instead of re-processing a long stale context with an expired KV cache, the model receives a compact summary, the most recent live context, and fresh input.
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
index 18f0bd53b..6d64698a7 100644
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@@ -83,6 +83,7 @@ class SubagentManager:
         disabled_skills: list[str] | None = None,
         max_iterations: int | None = None,
     ):
+        defaults = AgentDefaults()
         self.provider = provider
         self.workspace = workspace
         self.bus = bus
@@ -95,8 +96,9 @@ class SubagentManager:
         self.max_iterations = (
             max_iterations
             if max_iterations is not None
-            else AgentDefaults().max_tool_iterations
+            else defaults.max_tool_iterations
         )
+        self.max_concurrent_subagents = defaults.max_concurrent_subagents
         self.runner = AgentRunner(provider)
         self._running_tasks: dict[str, asyncio.Task[None]] = {}
         self._task_statuses: dict[str, SubagentStatus] = {}
diff --git a/nanobot/agent/tools/spawn.py b/nanobot/agent/tools/spawn.py
index a1acf0aae..17ad48d12 100644
--- a/nanobot/agent/tools/spawn.py
+++ b/nanobot/agent/tools/spawn.py
@@ -56,6 +56,14 @@ class SpawnTool(Tool):
 
     async def execute(self, task: str, label: str | None = None, **kwargs: Any) -> str:
         """Spawn a subagent to execute the given task."""
+        running = self._manager.get_running_count()
+        limit = self._manager.max_concurrent_subagents
+        if running >= limit:
+            return (
+                f"Cannot spawn subagent: concurrency limit reached "
+                f"({running}/{limit} running). Wait for a running subagent "
+                f"to complete before spawning a new one."
+            )
         return await self._manager.spawn(
             task=task,
             label=label,
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index be4eb7202..2f20eb99e 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -78,6 +78,7 @@ class AgentDefaults(Base):
     context_block_limit: int | None = None
     temperature: float = 0.1
     max_tool_iterations: int = 200
+    max_concurrent_subagents: int = Field(default=1, ge=1)
     max_tool_result_chars: int = 16_000
     provider_retry_mode: Literal["standard", "persistent"] = "standard"
     reasoning_effort: str | None = None  # low / medium / high / adaptive - enables LLM thinking mode
diff --git a/tests/agent/tools/test_subagent_tools.py b/tests/agent/tools/test_subagent_tools.py
index a050a4271..f43f98f24 100644
--- a/tests/agent/tools/test_subagent_tools.py
+++ b/tests/agent/tools/test_subagent_tools.py
@@ -93,6 +93,75 @@ async def test_subagent_uses_configured_max_iterations(tmp_path):
     mgr.runner.run.assert_awaited_once()
 
 
+@pytest.mark.asyncio
+async def test_spawn_tool_rejects_when_at_concurrency_limit(tmp_path):
+    """SpawnTool should return an error string when the concurrency limit is reached."""
+    from nanobot.agent.subagent import SubagentManager
+    from nanobot.agent.tools.spawn import SpawnTool
+    from nanobot.bus.queue import MessageBus
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+    mgr = SubagentManager(
+        provider=provider,
+        workspace=tmp_path,
+        bus=bus,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    )
+    mgr._announce_result = AsyncMock()
+
+    # Block the first subagent so it stays "running"
+    release = asyncio.Event()
+
+    async def fake_run(spec):
+        await release.wait()
+        return SimpleNamespace(
+            stop_reason="done",
+            final_content="done",
+            error=None,
+            tool_events=[],
+        )
+
+    mgr.runner.run = AsyncMock(side_effect=fake_run)
+
+    tool = SpawnTool(mgr)
+    tool.set_context("test", "c1", "test:c1")
+
+    # First spawn succeeds
+    result = await tool.execute(task="first task")
+    assert "started" in result
+
+    # Second spawn should be rejected (default limit is 1)
+    result = await tool.execute(task="second task")
+    assert "Cannot spawn subagent" in result
+    assert "concurrency limit reached" in result
+
+    # Release the first subagent
+    release.set()
+    # Allow cleanup
+    await asyncio.gather(*mgr._running_tasks.values(), return_exceptions=True)
+
+
+def test_subagent_default_max_concurrent_matches_agent_defaults(tmp_path):
+    """Direct SubagentManager construction should use the agent default concurrency limit."""
+    from nanobot.agent.subagent import SubagentManager
+    from nanobot.bus.queue import MessageBus
+
+    bus = MessageBus()
+    provider = MagicMock()
+    provider.get_default_model.return_value = "test-model"
+
+    mgr = SubagentManager(
+        provider=provider,
+        workspace=tmp_path,
+        bus=bus,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+    )
+
+    assert mgr.max_concurrent_subagents == AgentDefaults().max_concurrent_subagents
+
+
 def test_subagent_default_max_iterations_matches_agent_defaults(tmp_path):
     """Direct SubagentManager construction should use the agent default limit."""
     from nanobot.agent.subagent import SubagentManager
diff --git a/tests/test_tool_contextvars.py b/tests/test_tool_contextvars.py
index a1e7bd8c0..3763ba980 100644
--- a/tests/test_tool_contextvars.py
+++ b/tests/test_tool_contextvars.py
@@ -49,6 +49,11 @@ async def test_spawn_tool_keeps_task_local_context() -> None:
     release = asyncio.Event()
 
     class _Manager:
+        max_concurrent_subagents = 1
+
+        def get_running_count(self) -> int:
+            return 0
+
         async def spawn(
             self,
             *,
@@ -156,6 +161,11 @@ async def test_spawn_tool_basic_set_context_and_execute() -> None:
     seen: list[tuple[str, str, str]] = []
 
     class _Manager:
+        max_concurrent_subagents = 1
+
+        def get_running_count(self) -> int:
+            return 0
+
         async def spawn(
             self,
             *,
@@ -183,6 +193,11 @@ async def test_spawn_tool_default_values_without_set_context() -> None:
     seen: list[tuple[str, str, str]] = []
 
     class _Manager:
+        max_concurrent_subagents = 1
+
+        def get_running_count(self) -> int:
+            return 0
+
         async def spawn(
             self,
             *,