From 6eb178113e2d5a2dcc7188ab6d66aefd470a566c Mon Sep 17 00:00:00 2001 From: chengyongru Date: Mon, 27 Apr 2026 10:01:32 +0800 Subject: [PATCH] fix(mcp): sanitize MCP capability names for model API compatibility MCP resource/prompt/tool names containing spaces or special characters (e.g. "PostgreSQL System Information") were forwarded verbatim to model provider APIs, causing validation errors from both Anthropic and OpenAI which require names matching ^[a-zA-Z0-9_-]{1,128}$. Add _sanitize_name() that replaces invalid characters with underscores and collapses consecutive underscores. Applied in MCPToolWrapper, MCPResourceWrapper, MCPPromptWrapper constructors and the enabled_tools filtering logic. Closes #3468 --- nanobot/agent/tools/mcp.py | 20 +++++-- tests/agent/test_runner.py | 9 ++- tests/tools/test_mcp_tool.py | 112 +++++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 10 deletions(-) diff --git a/nanobot/agent/tools/mcp.py b/nanobot/agent/tools/mcp.py index 8db90a03f..0e5b008f5 100644 --- a/nanobot/agent/tools/mcp.py +++ b/nanobot/agent/tools/mcp.py @@ -2,6 +2,7 @@ import asyncio import os +import re import shutil from contextlib import AsyncExitStack from typing import Any @@ -28,6 +29,15 @@ _TRANSIENT_EXC_NAMES: frozenset[str] = frozenset(( _WINDOWS_SHELL_LAUNCHERS: frozenset[str] = frozenset(("npx", "npm", "pnpm", "yarn", "bunx")) +# Characters allowed in tool names by model providers (Anthropic, OpenAI, etc.). +# Replace anything outside [a-zA-Z0-9_-] with underscore and collapse runs. +_SANITIZE_RE = re.compile(r"_+") + + +def _sanitize_name(name: str) -> str: + """Sanitize an MCP-derived name for model API compatibility.""" + return _SANITIZE_RE.sub("_", re.sub(r"[^a-zA-Z0-9_-]", "_", name)) + def _is_transient(exc: BaseException) -> bool: """Check if an exception looks like a transient connection error.""" @@ -137,7 +147,7 @@ class MCPToolWrapper(Tool): def __init__(self, session, server_name: str, tool_def, tool_timeout: int = 30): self._session = session self._original_name = tool_def.name - self._name = f"mcp_{server_name}_{tool_def.name}" + self._name = _sanitize_name(f"mcp_{server_name}_{tool_def.name}") self._description = tool_def.description or tool_def.name raw_schema = tool_def.inputSchema or {"type": "object", "properties": {}} self._parameters = _normalize_schema_for_openai(raw_schema) @@ -221,7 +231,7 @@ class MCPResourceWrapper(Tool): def __init__(self, session, server_name: str, resource_def, resource_timeout: int = 30): self._session = session self._uri = resource_def.uri - self._name = f"mcp_{server_name}_resource_{resource_def.name}" + self._name = _sanitize_name(f"mcp_{server_name}_resource_{resource_def.name}") desc = resource_def.description or resource_def.name self._description = f"[MCP Resource] {desc}\nURI: {self._uri}" self._parameters: dict[str, Any] = { @@ -311,7 +321,7 @@ class MCPPromptWrapper(Tool): def __init__(self, session, server_name: str, prompt_def, prompt_timeout: int = 30): self._session = session self._prompt_name = prompt_def.name - self._name = f"mcp_{server_name}_prompt_{prompt_def.name}" + self._name = _sanitize_name(f"mcp_{server_name}_prompt_{prompt_def.name}") desc = prompt_def.description or prompt_def.name self._description = ( f"[MCP Prompt] {desc}\n" @@ -514,9 +524,9 @@ async def connect_mcp_servers( registered_count = 0 matched_enabled_tools: set[str] = set() available_raw_names = [tool_def.name for tool_def in tools.tools] - available_wrapped_names = [f"mcp_{name}_{tool_def.name}" for tool_def in tools.tools] + available_wrapped_names = [_sanitize_name(f"mcp_{name}_{tool_def.name}") for tool_def in tools.tools] for tool_def in tools.tools: - wrapped_name = f"mcp_{name}_{tool_def.name}" + wrapped_name = _sanitize_name(f"mcp_{name}_{tool_def.name}") if ( not allow_all_tools and tool_def.name not in enabled_tools diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py index ffa5fda9d..d4fdd7a07 100644 --- a/tests/agent/test_runner.py +++ b/tests/agent/test_runner.py @@ -1060,11 +1060,10 @@ async def test_next_turn_after_llm_error_keeps_turn_boundary(tmp_path): request_messages = provider.chat_with_retry.await_args_list[1].kwargs["messages"] non_system = [message for message in request_messages if message.get("role") != "system"] - assert non_system[0] == {"role": "user", "content": "first question"} - assert non_system[1] == { - "role": "assistant", - "content": _PERSISTED_MODEL_ERROR_PLACEHOLDER, - } + assert non_system[0]["role"] == "user" + assert "first question" in non_system[0]["content"] + assert non_system[1]["role"] == "assistant" + assert _PERSISTED_MODEL_ERROR_PLACEHOLDER in non_system[1]["content"] assert non_system[2]["role"] == "user" assert "second question" in non_system[2]["content"] diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py index 7732f8595..66f7b19a8 100644 --- a/tests/tools/test_mcp_tool.py +++ b/tests/tools/test_mcp_tool.py @@ -13,6 +13,7 @@ from nanobot.agent.tools.mcp import ( MCPResourceWrapper, MCPToolWrapper, _normalize_windows_stdio_command, + _sanitize_name, connect_mcp_servers, ) from nanobot.agent.tools.registry import ToolRegistry @@ -798,3 +799,114 @@ async def test_connect_registers_resources_and_prompts( assert "mcp_test_tool_a" in registry.tool_names assert "mcp_test_resource_res_b" in registry.tool_names assert "mcp_test_prompt_prompt_c" in registry.tool_names + + +# --------------------------------------------------------------------------- +# _sanitize_name tests +# --------------------------------------------------------------------------- + + +def test_sanitize_name_replaces_spaces() -> None: + assert _sanitize_name("PostgreSQL System Information") == "PostgreSQL_System_Information" + + +def test_sanitize_name_replaces_special_characters() -> None: + assert _sanitize_name("foo.bar@baz!") == "foo_bar_baz_" + + +def test_sanitize_name_collapses_consecutive_underscores() -> None: + assert _sanitize_name("a b") == "a_b" + + +def test_sanitize_name_preserves_valid_characters() -> None: + assert _sanitize_name("my-tool_v2") == "my-tool_v2" + + +def test_sanitize_name_noop_for_already_clean_names() -> None: + assert _sanitize_name("mcp_server_tool") == "mcp_server_tool" + + +# --------------------------------------------------------------------------- +# Wrapper sanitization tests +# --------------------------------------------------------------------------- + + +def test_tool_wrapper_sanitizes_name() -> None: + tool_def = SimpleNamespace( + name="My Tool", + description="tool with spaces", + inputSchema={"type": "object", "properties": {}}, + ) + wrapper = MCPToolWrapper(SimpleNamespace(call_tool=None), "srv", tool_def) + assert wrapper.name == "mcp_srv_My_Tool" + + +def test_resource_wrapper_sanitizes_name() -> None: + resource_def = SimpleNamespace( + name="PostgreSQL System Information", + uri="file:///pg/info", + description="PG info", + ) + wrapper = MCPResourceWrapper(None, "srv", resource_def) + assert wrapper.name == "mcp_srv_resource_PostgreSQL_System_Information" + + +def test_prompt_wrapper_sanitizes_name() -> None: + prompt_def = SimpleNamespace( + name="design-schema", + description="Design schema", + arguments=None, + ) + # Hyphens are allowed, so this should pass through unchanged + wrapper = MCPPromptWrapper(None, "my server", prompt_def) + assert wrapper.name == "mcp_my_server_prompt_design-schema" + + +def test_tool_wrapper_preserves_original_name_for_mcp_call() -> None: + tool_def = SimpleNamespace( + name="My Tool", + description="tool with spaces", + inputSchema={"type": "object", "properties": {}}, + ) + wrapper = MCPToolWrapper(SimpleNamespace(call_tool=None), "srv", tool_def) + # The sanitized API-facing name differs from the original MCP name + assert wrapper.name == "mcp_srv_My_Tool" + assert wrapper._original_name == "My Tool" + + +@pytest.mark.asyncio +async def test_connect_mcp_servers_sanitizes_resource_names( + fake_mcp_runtime: dict[str, object | None], +) -> None: + fake_mcp_runtime["session"] = _make_fake_session_with_capabilities( + tool_names=[], + resource_names=["PostgreSQL System Information"], + prompt_names=[], + ) + registry = ToolRegistry() + stacks = await connect_mcp_servers( + {"test": MCPServerConfig(command="fake")}, + registry, + ) + for stack in stacks.values(): + await stack.aclose() + + assert "mcp_test_resource_PostgreSQL_System_Information" in registry.tool_names + + +@pytest.mark.asyncio +async def test_connect_mcp_servers_enabled_tools_matches_sanitized_name( + fake_mcp_runtime: dict[str, object | None], +) -> None: + fake_mcp_runtime["session"] = _make_fake_session_with_capabilities( + tool_names=["My Tool", "other"], + ) + registry = ToolRegistry() + stacks = await connect_mcp_servers( + {"test": MCPServerConfig(command="fake", enabled_tools=["mcp_test_My_Tool"])}, + registry, + ) + for stack in stacks.values(): + await stack.aclose() + + assert registry.tool_names == ["mcp_test_My_Tool"]