From bc0ff7f2143e6f07131133abbe15eb1928446fe4 Mon Sep 17 00:00:00 2001 From: whs Date: Mon, 6 Apr 2026 07:00:02 +0800 Subject: [PATCH] feat(status): add web search provider usage to /status command --- nanobot/agent/tools/search_usage.py | 183 +++++++++++++++++ nanobot/command/builtin.py | 16 ++ nanobot/utils/helpers.py | 16 +- tests/tools/__init__.py | 0 tests/tools/test_search_usage.py | 303 ++++++++++++++++++++++++++++ 5 files changed, 515 insertions(+), 3 deletions(-) create mode 100644 nanobot/agent/tools/search_usage.py create mode 100644 tests/tools/__init__.py create mode 100644 tests/tools/test_search_usage.py diff --git a/nanobot/agent/tools/search_usage.py b/nanobot/agent/tools/search_usage.py new file mode 100644 index 000000000..70fecb8c6 --- /dev/null +++ b/nanobot/agent/tools/search_usage.py @@ -0,0 +1,183 @@ +"""Web search provider usage fetchers for /status command.""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Any + + +@dataclass +class SearchUsageInfo: + """Structured usage info returned by a provider fetcher.""" + + provider: str + supported: bool = False # True if the provider has a usage API + error: str | None = None # Set when the API call failed + + # Usage counters (None = not available for this provider) + used: int | None = None + limit: int | None = None + remaining: int | None = None + reset_date: str | None = None # ISO date string, e.g. "2026-05-01" + + # Tavily-specific breakdown + search_used: int | None = None + extract_used: int | None = None + crawl_used: int | None = None + + def format(self) -> str: + """Return a human-readable multi-line string for /status output.""" + lines = [f"🔍 Web Search: {self.provider}"] + + if not self.supported: + lines.append(" Usage tracking: not available for this provider") + return "\n".join(lines) + + if self.error: + lines.append(f" Usage: unavailable ({self.error})") + return "\n".join(lines) + + if self.used is not None and self.limit is not None: + lines.append(f" Usage: {self.used} / {self.limit} requests") + elif self.used is not None: + lines.append(f" Usage: {self.used} requests") + + # Tavily breakdown + breakdown_parts = [] + if self.search_used is not None: + breakdown_parts.append(f"Search: {self.search_used}") + if self.extract_used is not None: + breakdown_parts.append(f"Extract: {self.extract_used}") + if self.crawl_used is not None: + breakdown_parts.append(f"Crawl: {self.crawl_used}") + if breakdown_parts: + lines.append(f" Breakdown: {' | '.join(breakdown_parts)}") + + if self.remaining is not None: + lines.append(f" Remaining: {self.remaining} requests") + + if self.reset_date: + lines.append(f" Resets: {self.reset_date}") + + return "\n".join(lines) + + +async def fetch_search_usage( + provider: str, + api_key: str | None = None, +) -> SearchUsageInfo: + """ + Fetch usage info for the configured web search provider. + + Args: + provider: Provider name (e.g. "tavily", "brave", "duckduckgo"). + api_key: API key for the provider (falls back to env vars). + + Returns: + SearchUsageInfo with populated fields where available. + """ + p = (provider or "duckduckgo").strip().lower() + + if p == "tavily": + return await _fetch_tavily_usage(api_key) + elif p == "brave": + return await _fetch_brave_usage(api_key) + else: + # duckduckgo, searxng, jina, unknown — no usage API + return SearchUsageInfo(provider=p, supported=False) + + +# --------------------------------------------------------------------------- +# Tavily +# --------------------------------------------------------------------------- + +async def _fetch_tavily_usage(api_key: str | None) -> SearchUsageInfo: + """Fetch usage from GET https://api.tavily.com/usage.""" + import httpx + + key = api_key or os.environ.get("TAVILY_API_KEY", "") + if not key: + return SearchUsageInfo( + provider="tavily", + supported=True, + error="TAVILY_API_KEY not configured", + ) + + try: + async with httpx.AsyncClient(timeout=8.0) as client: + r = await client.get( + "https://api.tavily.com/usage", + headers={"Authorization": f"Bearer {key}"}, + ) + r.raise_for_status() + data: dict[str, Any] = r.json() + return _parse_tavily_usage(data) + except httpx.HTTPStatusError as e: + return SearchUsageInfo( + provider="tavily", + supported=True, + error=f"HTTP {e.response.status_code}", + ) + except Exception as e: + return SearchUsageInfo( + provider="tavily", + supported=True, + error=str(e)[:80], + ) + + +def _parse_tavily_usage(data: dict[str, Any]) -> SearchUsageInfo: + """ + Parse Tavily /usage response. + + Expected shape (may vary by plan): + { + "used": 142, + "limit": 1000, + "remaining": 858, + "reset_date": "2026-05-01", + "breakdown": { + "search": 120, + "extract": 15, + "crawl": 7 + } + } + """ + used = data.get("used") + limit = data.get("limit") + remaining = data.get("remaining") + reset_date = data.get("reset_date") or data.get("resetDate") + + # Compute remaining if not provided + if remaining is None and used is not None and limit is not None: + remaining = max(0, limit - used) + + breakdown = data.get("breakdown") or {} + search_used = breakdown.get("search") + extract_used = breakdown.get("extract") + crawl_used = breakdown.get("crawl") + + return SearchUsageInfo( + provider="tavily", + supported=True, + used=used, + limit=limit, + remaining=remaining, + reset_date=str(reset_date) if reset_date else None, + search_used=search_used, + extract_used=extract_used, + crawl_used=crawl_used, + ) + + +# --------------------------------------------------------------------------- +# Brave +# --------------------------------------------------------------------------- + +async def _fetch_brave_usage(api_key: str | None) -> SearchUsageInfo: + """ + Brave Search does not have a public usage/quota endpoint. + Rate-limit headers are returned per-request, not queryable standalone. + """ + return SearchUsageInfo(provider="brave", supported=False) diff --git a/nanobot/command/builtin.py b/nanobot/command/builtin.py index 514ac1438..81623ebd5 100644 --- a/nanobot/command/builtin.py +++ b/nanobot/command/builtin.py @@ -60,6 +60,21 @@ async def cmd_status(ctx: CommandContext) -> OutboundMessage: pass if ctx_est <= 0: ctx_est = loop._last_usage.get("prompt_tokens", 0) + + # Fetch web search provider usage (best-effort, never blocks the response) + search_usage_text: str | None = None + try: + from nanobot.agent.tools.search_usage import fetch_search_usage + web_cfg = getattr(getattr(loop, "config", None), "tools", None) + web_cfg = getattr(web_cfg, "web", None) if web_cfg else None + search_cfg = getattr(web_cfg, "search", None) if web_cfg else None + if search_cfg is not None: + provider = getattr(search_cfg, "provider", "duckduckgo") + api_key = getattr(search_cfg, "api_key", "") or None + usage = await fetch_search_usage(provider=provider, api_key=api_key) + search_usage_text = usage.format() + except Exception: + pass # Never let usage fetch break /status return OutboundMessage( channel=ctx.msg.channel, chat_id=ctx.msg.chat_id, @@ -69,6 +84,7 @@ async def cmd_status(ctx: CommandContext) -> OutboundMessage: context_window_tokens=loop.context_window_tokens, session_msg_count=len(session.get_history(max_messages=0)), context_tokens_estimate=ctx_est, + search_usage_text=search_usage_text, ), metadata={**dict(ctx.msg.metadata or {}), "render_as": "text"}, ) diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 93293c9e0..7267bac2a 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -396,8 +396,15 @@ def build_status_content( context_window_tokens: int, session_msg_count: int, context_tokens_estimate: int, + search_usage_text: str | None = None, ) -> str: - """Build a human-readable runtime status snapshot.""" + """Build a human-readable runtime status snapshot. + + Args: + search_usage_text: Optional pre-formatted web search usage string + (produced by SearchUsageInfo.format()). When provided + it is appended as an extra section. + """ uptime_s = int(time.time() - start_time) uptime = ( f"{uptime_s // 3600}h {(uptime_s % 3600) // 60}m" @@ -414,14 +421,17 @@ def build_status_content( token_line = f"\U0001f4ca Tokens: {last_in} in / {last_out} out" if cached and last_in: token_line += f" ({cached * 100 // last_in}% cached)" - return "\n".join([ + lines = [ f"\U0001f408 nanobot v{version}", f"\U0001f9e0 Model: {model}", token_line, f"\U0001f4da Context: {ctx_used_str}/{ctx_total_str} ({ctx_pct}%)", f"\U0001f4ac Session: {session_msg_count} messages", f"\u23f1 Uptime: {uptime}", - ]) + ] + if search_usage_text: + lines.append(search_usage_text) + return "\n".join(lines) def sync_workspace_templates(workspace: Path, silent: bool = False) -> list[str]: diff --git a/tests/tools/__init__.py b/tests/tools/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tools/test_search_usage.py b/tests/tools/test_search_usage.py new file mode 100644 index 000000000..faec41dfa --- /dev/null +++ b/tests/tools/test_search_usage.py @@ -0,0 +1,303 @@ +"""Tests for web search provider usage fetching and /status integration.""" + +from __future__ import annotations + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +from nanobot.agent.tools.search_usage import ( + SearchUsageInfo, + _parse_tavily_usage, + fetch_search_usage, +) +from nanobot.utils.helpers import build_status_content + + +# --------------------------------------------------------------------------- +# SearchUsageInfo.format() tests +# --------------------------------------------------------------------------- + +class TestSearchUsageInfoFormat: + def test_unsupported_provider_shows_no_tracking(self): + info = SearchUsageInfo(provider="duckduckgo", supported=False) + text = info.format() + assert "duckduckgo" in text + assert "not available" in text + + def test_supported_with_error(self): + info = SearchUsageInfo(provider="tavily", supported=True, error="HTTP 401") + text = info.format() + assert "tavily" in text + assert "HTTP 401" in text + assert "unavailable" in text + + def test_full_tavily_usage(self): + info = SearchUsageInfo( + provider="tavily", + supported=True, + used=142, + limit=1000, + remaining=858, + reset_date="2026-05-01", + search_used=120, + extract_used=15, + crawl_used=7, + ) + text = info.format() + assert "tavily" in text + assert "142 / 1000" in text + assert "858" in text + assert "2026-05-01" in text + assert "Search: 120" in text + assert "Extract: 15" in text + assert "Crawl: 7" in text + + def test_usage_without_limit(self): + info = SearchUsageInfo(provider="tavily", supported=True, used=50) + text = info.format() + assert "50 requests" in text + assert "/" not in text.split("Usage:")[1].split("\n")[0] + + def test_no_breakdown_when_none(self): + info = SearchUsageInfo( + provider="tavily", supported=True, used=10, limit=100, remaining=90 + ) + text = info.format() + assert "Breakdown" not in text + + def test_brave_unsupported(self): + info = SearchUsageInfo(provider="brave", supported=False) + text = info.format() + assert "brave" in text + assert "not available" in text + + +# --------------------------------------------------------------------------- +# _parse_tavily_usage tests +# --------------------------------------------------------------------------- + +class TestParseTavilyUsage: + def test_full_response(self): + data = { + "used": 142, + "limit": 1000, + "remaining": 858, + "reset_date": "2026-05-01", + "breakdown": {"search": 120, "extract": 15, "crawl": 7}, + } + info = _parse_tavily_usage(data) + assert info.provider == "tavily" + assert info.supported is True + assert info.used == 142 + assert info.limit == 1000 + assert info.remaining == 858 + assert info.reset_date == "2026-05-01" + assert info.search_used == 120 + assert info.extract_used == 15 + assert info.crawl_used == 7 + + def test_remaining_computed_when_missing(self): + data = {"used": 300, "limit": 1000} + info = _parse_tavily_usage(data) + assert info.remaining == 700 + + def test_remaining_not_negative(self): + data = {"used": 1100, "limit": 1000} + info = _parse_tavily_usage(data) + assert info.remaining == 0 + + def test_camel_case_reset_date(self): + data = {"used": 10, "limit": 100, "resetDate": "2026-06-01"} + info = _parse_tavily_usage(data) + assert info.reset_date == "2026-06-01" + + def test_empty_response(self): + info = _parse_tavily_usage({}) + assert info.provider == "tavily" + assert info.supported is True + assert info.used is None + assert info.limit is None + + def test_no_breakdown_key(self): + data = {"used": 5, "limit": 50} + info = _parse_tavily_usage(data) + assert info.search_used is None + assert info.extract_used is None + assert info.crawl_used is None + + +# --------------------------------------------------------------------------- +# fetch_search_usage routing tests +# --------------------------------------------------------------------------- + +class TestFetchSearchUsageRouting: + @pytest.mark.asyncio + async def test_duckduckgo_returns_unsupported(self): + info = await fetch_search_usage("duckduckgo") + assert info.provider == "duckduckgo" + assert info.supported is False + + @pytest.mark.asyncio + async def test_searxng_returns_unsupported(self): + info = await fetch_search_usage("searxng") + assert info.supported is False + + @pytest.mark.asyncio + async def test_jina_returns_unsupported(self): + info = await fetch_search_usage("jina") + assert info.supported is False + + @pytest.mark.asyncio + async def test_brave_returns_unsupported(self): + info = await fetch_search_usage("brave") + assert info.provider == "brave" + assert info.supported is False + + @pytest.mark.asyncio + async def test_unknown_provider_returns_unsupported(self): + info = await fetch_search_usage("some_unknown_provider") + assert info.supported is False + + @pytest.mark.asyncio + async def test_tavily_no_api_key_returns_error(self): + with patch.dict("os.environ", {}, clear=True): + # Ensure TAVILY_API_KEY is not set + import os + os.environ.pop("TAVILY_API_KEY", None) + info = await fetch_search_usage("tavily", api_key=None) + assert info.provider == "tavily" + assert info.supported is True + assert info.error is not None + assert "not configured" in info.error + + @pytest.mark.asyncio + async def test_tavily_success(self): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "used": 142, + "limit": 1000, + "remaining": 858, + "reset_date": "2026-05-01", + "breakdown": {"search": 120, "extract": 15, "crawl": 7}, + } + mock_response.raise_for_status = MagicMock() + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(return_value=mock_response) + + with patch("httpx.AsyncClient", return_value=mock_client): + info = await fetch_search_usage("tavily", api_key="test-key") + + assert info.provider == "tavily" + assert info.supported is True + assert info.error is None + assert info.used == 142 + assert info.limit == 1000 + assert info.remaining == 858 + assert info.reset_date == "2026-05-01" + assert info.search_used == 120 + + @pytest.mark.asyncio + async def test_tavily_http_error(self): + import httpx + + mock_response = MagicMock() + mock_response.status_code = 401 + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "401", request=MagicMock(), response=mock_response + ) + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(return_value=mock_response) + + with patch("httpx.AsyncClient", return_value=mock_client): + info = await fetch_search_usage("tavily", api_key="bad-key") + + assert info.supported is True + assert info.error == "HTTP 401" + + @pytest.mark.asyncio + async def test_tavily_network_error(self): + import httpx + + mock_client = AsyncMock() + mock_client.__aenter__ = AsyncMock(return_value=mock_client) + mock_client.__aexit__ = AsyncMock(return_value=False) + mock_client.get = AsyncMock(side_effect=httpx.ConnectError("timeout")) + + with patch("httpx.AsyncClient", return_value=mock_client): + info = await fetch_search_usage("tavily", api_key="test-key") + + assert info.supported is True + assert info.error is not None + + @pytest.mark.asyncio + async def test_provider_name_case_insensitive(self): + info = await fetch_search_usage("Tavily", api_key=None) + assert info.provider == "tavily" + assert info.supported is True + + +# --------------------------------------------------------------------------- +# build_status_content integration tests +# --------------------------------------------------------------------------- + +class TestBuildStatusContentWithSearchUsage: + _BASE_KWARGS = dict( + version="0.1.0", + model="claude-opus-4-5", + start_time=1_000_000.0, + last_usage={"prompt_tokens": 1000, "completion_tokens": 200}, + context_window_tokens=65536, + session_msg_count=5, + context_tokens_estimate=3000, + ) + + def test_no_search_usage_unchanged(self): + """Omitting search_usage_text keeps existing behaviour.""" + content = build_status_content(**self._BASE_KWARGS) + assert "🔍" not in content + assert "Web Search" not in content + + def test_search_usage_none_unchanged(self): + content = build_status_content(**self._BASE_KWARGS, search_usage_text=None) + assert "🔍" not in content + + def test_search_usage_appended(self): + usage_text = "🔍 Web Search: tavily\n Usage: 142 / 1000 requests" + content = build_status_content(**self._BASE_KWARGS, search_usage_text=usage_text) + assert "🔍 Web Search: tavily" in content + assert "142 / 1000" in content + + def test_existing_fields_still_present(self): + usage_text = "🔍 Web Search: duckduckgo\n Usage tracking: not available" + content = build_status_content(**self._BASE_KWARGS, search_usage_text=usage_text) + # Original fields must still be present + assert "nanobot v0.1.0" in content + assert "claude-opus-4-5" in content + assert "1000 in / 200 out" in content + # New field appended + assert "duckduckgo" in content + + def test_full_tavily_in_status(self): + info = SearchUsageInfo( + provider="tavily", + supported=True, + used=142, + limit=1000, + remaining=858, + reset_date="2026-05-01", + search_used=120, + extract_used=15, + crawl_used=7, + ) + content = build_status_content(**self._BASE_KWARGS, search_usage_text=info.format()) + assert "142 / 1000" in content + assert "858" in content + assert "2026-05-01" in content + assert "Search: 120" in content