refactor(agent): move usage logging to _LoopHook, simplify runner

- Runner no longer creates an intermediate iter_usage dict; it passes raw_usage directly to context.usage for hooks to consume freely. - Usage logging moves to _LoopHook.after_iteration() alongside other product-layer concerns (progress, tool hints). - Runner only keeps the minimal accumulation needed for AgentRunResult. - Removes loguru import from runner.py (no longer needed).
2026-05-01 07:15:52 +00:00 · 2026-03-30 17:56:39 +08:00 · 2026-03-30 17:56:39 +08:00 · 57b46bc520
commit 57b46bc520
parent 9c869d0bdf
2 changed files with 17 additions and 22 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -257,6 +257,15 @@ class AgentLoop:
            def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
                return loop_self._strip_think(content)
            async def after_iteration(self, context: AgentHookContext) -> None:
                u = context.usage or {}
                logger.debug(
                    "LLM usage: prompt={} completion={} cached={}",
                    u.get("prompt_tokens", 0),
                    u.get("completion_tokens", 0),
                    u.get("cached_tokens", 0),
                )
        result = await self.runner.run(AgentRunSpec(
            initial_messages=initial_messages,
            tools=self.tools,
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@ -6,8 +6,6 @@ import asyncio
 from dataclasses import dataclass, field
 from typing import Any
 from loguru import logger
 from nanobot.agent.hook import AgentHook, AgentHookContext
 from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.providers.base import LLMProvider, ToolCallRequest
@ -62,7 +60,7 @@ class AgentRunner:
        messages = list(spec.initial_messages)
        final_content: str | None = None
        tools_used: list[str] = []
-        usage = {"prompt_tokens": 0, "completion_tokens": 0, "cached_tokens": 0}
+        usage: dict[str, int] = {}
        error: str | None = None
        stop_reason = "completed"
        tool_events: list[dict[str, str]] = []
@ -94,27 +92,15 @@ class AgentRunner:
                response = await self.provider.chat_with_retry(**kwargs)
            raw_usage = response.usage or {}
-            iter_usage = {
+            context.response = response
-                "prompt_tokens": int(raw_usage.get("prompt_tokens", 0) or 0),
+            context.usage = raw_usage
-                "completion_tokens": int(raw_usage.get("completion_tokens", 0) or 0),
+            context.tool_calls = list(response.tool_calls)
-            }
+            # Accumulate standard fields into result usage.
-            # Pass through cached_tokens if present.
+            usage["prompt_tokens"] = usage.get("prompt_tokens", 0) + int(raw_usage.get("prompt_tokens", 0) or 0)
            usage["completion_tokens"] = usage.get("completion_tokens", 0) + int(raw_usage.get("completion_tokens", 0) or 0)
            cached = raw_usage.get("cached_tokens")
            if cached:
-                iter_usage["cached_tokens"] = int(cached)
+                usage["cached_tokens"] = usage.get("cached_tokens", 0) + int(cached)
            usage["prompt_tokens"] += iter_usage["prompt_tokens"]
            usage["completion_tokens"] += iter_usage["completion_tokens"]
            if "cached_tokens" in iter_usage:
                usage["cached_tokens"] = usage.get("cached_tokens", 0) + iter_usage["cached_tokens"]
            context.response = response
            context.usage = iter_usage
            logger.debug(
                "LLM usage: prompt={} completion={} cached={}",
                iter_usage["prompt_tokens"],
                iter_usage["completion_tokens"],
                iter_usage.get("cached_tokens", 0),
            )
            context.tool_calls = list(response.tool_calls)
            if response.has_tool_calls:
                if hook.wants_streaming():