feat: add ConversationCallback for LiteLLM tracing

2026-05-28 04:21:17 +00:00 · 2026-03-19 22:39:48 +08:00 · 2026-03-19 22:39:48 +08:00 · 9f0ce3924d
commit 9f0ce3924d
parent 5fd66cae5c
8 changed files with 401 additions and 6 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -212,6 +212,7 @@ class AgentLoop:
        self,
        initial_messages: list[dict],
        on_progress: Callable[..., Awaitable[None]] | None = None,
+        metadata: dict[str, Any] | None = None,
    ) -> tuple[str | None, list[str], list[dict]]:
        """Run the agent iteration loop."""
        messages = initial_messages
@ -228,6 +229,7 @@ class AgentLoop:
                messages=messages,
                tools=tool_defs,
                model=self.model,
+                metadata=metadata,
            )
            usage = response.usage or {}
            self._last_usage = {
@ -419,7 +421,9 @@ class AgentLoop:
                current_message=msg.content, channel=channel, chat_id=chat_id,
                current_role=current_role,
            )
-            final_content, _, all_msgs = await self._run_agent_loop(messages)
+            final_content, _, all_msgs = await self._run_agent_loop(
+                messages, metadata={"session_key": key},
+            )
            self._save_turn(session, all_msgs, 1 + len(history))
            self.sessions.save(session)
            self._schedule_background(self.memory_consolidator.maybe_consolidate_by_tokens(session))
@ -487,6 +491,7 @@ class AgentLoop:

        final_content, _, all_msgs = await self._run_agent_loop(
            initial_messages, on_progress=on_progress or _bus_progress,
+            metadata={"session_key": key},
        )

        if final_content is None:
--- a/nanobot/agent/subagent.py
+++ b/nanobot/agent/subagent.py
@ -15,7 +15,7 @@ from nanobot.agent.tools.shell import ExecTool
 from nanobot.agent.tools.web import WebFetchTool, WebSearchTool
 from nanobot.bus.events import InboundMessage
 from nanobot.bus.queue import MessageBus
-from nanobot.config.schema import ExecToolConfig
+from nanobot.config.schema import ExecToolConfig, WebSearchConfig
 from nanobot.providers.base import LLMProvider
 from nanobot.utils.helpers import build_assistant_message

@ -29,13 +29,11 @@ class SubagentManager:
        workspace: Path,
        bus: MessageBus,
        model: str | None = None,
-        web_search_config: "WebSearchConfig | None" = None,
+        web_search_config: WebSearchConfig | None = None,
        web_proxy: str | None = None,
        exec_config: "ExecToolConfig | None" = None,
        restrict_to_workspace: bool = False,
    ):
-        from nanobot.config.schema import ExecToolConfig, WebSearchConfig
-
        self.provider = provider
        self.workspace = workspace
        self.bus = bus
@ -113,6 +111,14 @@ class SubagentManager:
                {"role": "user", "content": task},
            ]

+            # Build metadata for tracing
+            session_key = f"{origin['channel']}:{origin['chat_id']}"
+            metadata = {
+                "session_key": session_key,
+                "agent_type": "subagent",
+                "task_id": task_id,
+            }
+
            # Run agent loop (limited iterations)
            max_iterations = 15
            iteration = 0
@ -125,6 +131,7 @@ class SubagentManager:
                    messages=messages,
                    tools=tools.get_definitions(),
                    model=self.model,
+                    metadata=metadata,
                )

                if response.has_tool_calls:
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@ -442,6 +442,7 @@ def _make_provider(config: Config):
            default_model=model,
        )
    else:
+        from nanobot.providers.callbacks import ConversationCallback
        from nanobot.providers.litellm_provider import LiteLLMProvider
        from nanobot.providers.registry import find_by_name
        spec = find_by_name(provider_name)
@ -449,12 +450,23 @@ def _make_provider(config: Config):
            console.print("[red]Error: No API key configured.[/red]")
            console.print("Set one in ~/.nanobot/config.json under providers section")
            raise typer.Exit(1)
+
+        # Create callback if tracing is enabled
+        callback = None
+        if config.tracing.enabled:
+            jsonl_path = config.tracing.jsonl_path
+            # Resolve relative paths relative to workspace
+            if jsonl_path:
+                jsonl_path = str(config.workspace_path / jsonl_path)
+            callback = ConversationCallback(jsonl_path=jsonl_path)
+
        provider = LiteLLMProvider(
            api_key=p.api_key if p else None,
            api_base=config.get_api_base(model),
            default_model=model,
            extra_headers=p.extra_headers if p else None,
            provider_name=provider_name,
+            conversation_callback=callback,
        )

    defaults = config.agents.defaults
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@ -143,6 +143,17 @@ class ToolsConfig(Base):
    mcp_servers: dict[str, MCPServerConfig] = Field(default_factory=dict)


+class TracingConfig(Base):
+    """Configuration for LLM call tracing."""
+
+    enabled: bool = True
+    jsonl_path: str | None = None  # e.g., "logs/traces.jsonl"
+
+    # LangSmith integration (built-in LiteLLM support)
+    langsmith_enabled: bool = False  # Override env var detection
+    langsmith_project: str = "nanobot"
+
+
 class Config(BaseSettings):
    """Root configuration for nanobot."""

@ -151,6 +162,7 @@ class Config(BaseSettings):
    providers: ProvidersConfig = Field(default_factory=ProvidersConfig)
    gateway: GatewayConfig = Field(default_factory=GatewayConfig)
    tools: ToolsConfig = Field(default_factory=ToolsConfig)
+    tracing: TracingConfig = Field(default_factory=TracingConfig)

    @property
    def workspace_path(self) -> Path:
--- a/nanobot/providers/base.py
+++ b/nanobot/providers/base.py
@ -232,6 +232,7 @@ class LLMProvider(ABC):
        temperature: object = _SENTINEL,
        reasoning_effort: object = _SENTINEL,
        tool_choice: str | dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
    ) -> LLMResponse:
        """Call chat() with retry on transient provider failures.

@ -250,6 +251,7 @@ class LLMProvider(ABC):
            messages=messages, tools=tools, model=model,
            max_tokens=max_tokens, temperature=temperature,
            reasoning_effort=reasoning_effort, tool_choice=tool_choice,
+            metadata=metadata,
        )

        for attempt, delay in enumerate(self._CHAT_RETRY_DELAYS, start=1):
--- a/nanobot/providers/callbacks.py
+++ b/nanobot/providers/callbacks.py
@ -0,0 +1,191 @@
+"""LiteLLM callbacks for conversation tracing.
+
+This module provides a non-invasive way to capture complete LLM conversation
+traces, including agent and subagent trajectories, without modifying core
+agent loop logic.
+
+The callback receives kwargs["messages"] which contains the FULL conversation
+history sent to the LLM - this is exactly what we need for trajectory persistence.
+
+Reference: https://docs.litellm.ai/docs/observability/custom_callback
+"""
+
+import asyncio
+import json
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Any
+
+from litellm.integrations.custom_logger import CustomLogger
+from loguru import logger
+
+
+def _calc_duration_ms(start_time: Any, end_time: Any) -> int:
+    """Calculate duration in milliseconds, handling both float and datetime inputs."""
+    try:
+        diff = end_time - start_time
+        if isinstance(diff, timedelta):
+            return int(diff.total_seconds() * 1000)
+        return int(diff * 1000)
+    except (TypeError, AttributeError):
+        return 0
+
+
+class ConversationCallback(CustomLogger):
+    """LiteLLM callback for tracking full conversation traces.
+
+    Captures complete LLM call context including:
+    - kwargs["messages"]: Full conversation history (user + assistant + tool)
+    - Response content, model, usage stats
+    - Session metadata via kwargs["litellm_params"]["metadata"]
+
+    This enables trajectory persistence without invasive modifications to
+    agent loop or subagent code.
+
+    Usage:
+        callback = ConversationCallback(jsonl_path=Path("traces.jsonl"))
+
+        # Register with LiteLLM (in LiteLLMProvider.__init__):
+        litellm.callbacks = [callback]
+
+        # Or pass via kwargs (current pattern in LiteLLMProvider.chat):
+        kwargs.setdefault("callbacks", []).append(callback)
+
+    Metadata Fields (passed via litellm_params.metadata):
+        - session_key: Session identifier (e.g., "cli:direct", "subagent:abc123")
+        - agent_type: "main" or "subagent"
+        - parent_session: For subagents, the parent session key
+        - task_id: For subagents, the task identifier
+        - turn_count: Current turn number in the conversation
+    """
+
+    def __init__(self, jsonl_path: str | Path | None = None):
+        super().__init__()
+        self.jsonl_path = Path(jsonl_path) if jsonl_path else None
+        self._write_lock = asyncio.Lock()
+
+    async def async_log_success_event(
+        self,
+        kwargs: dict[str, Any],
+        response_obj: Any,
+        start_time: float,
+        end_time: float,
+    ) -> None:
+        """Called by LiteLLM after each successful LLM completion.
+
+        Args:
+            kwargs: Contains model, messages, litellm_params (with metadata), etc.
+            response_obj: The completion response object
+            start_time: Unix timestamp of call start
+            end_time: Unix timestamp of call end
+        """
+        try:
+            # Extract core data
+            messages = kwargs.get("messages", [])
+            model = kwargs.get("model", "unknown")
+
+            # Extract metadata (session correlation)
+            litellm_params = kwargs.get("litellm_params", {})
+            metadata = litellm_params.get("metadata", {})
+
+            # Extract response content
+            response_content = ""
+            finish_reason = ""
+            if hasattr(response_obj, "choices") and response_obj.choices:
+                choice = response_obj.choices[0]
+                msg = choice.message
+                response_content = getattr(msg, "content", "") or ""
+                finish_reason = getattr(choice, "finish_reason", "") or ""
+
+            # Extract usage stats
+            usage = {}
+            if hasattr(response_obj, "usage") and response_obj.usage:
+                u = response_obj.usage
+                usage = {
+                    "prompt_tokens": getattr(u, "prompt_tokens", 0),
+                    "completion_tokens": getattr(u, "completion_tokens", 0),
+                    "total_tokens": getattr(u, "total_tokens", 0),
+                }
+
+            # Extract cost and cache info (LiteLLM calculates these)
+            cost = kwargs.get("response_cost", 0)
+            cache_hit = kwargs.get("cache_hit", False)
+
+            # Build trace entry
+            entry = {
+                "timestamp": datetime.now().isoformat(),
+                "model": model,
+                "messages": messages,  # FULL conversation history
+                "response": response_content,
+                "finish_reason": finish_reason,
+                "usage": usage,
+                "cost": cost,
+                "cache_hit": cache_hit,
+                "metadata": metadata,
+                "start_time": start_time.isoformat() if isinstance(start_time, datetime) else start_time,
+                "end_time": end_time.isoformat() if isinstance(end_time, datetime) else end_time,
+                "duration_ms": _calc_duration_ms(start_time, end_time),
+            }
+
+            # Write to JSONL (protected by lock for concurrent safety)
+            if self.jsonl_path:
+                self.jsonl_path.parent.mkdir(parents=True, exist_ok=True)
+                async with self._write_lock:
+                    with open(self.jsonl_path, "a", encoding="utf-8") as f:
+                        f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+
+            logger.debug(
+                "ConversationCallback: session={}, model={}, messages={}, cost={}",
+                metadata.get("session_key", "unknown"),
+                model,
+                len(messages),
+                cost
+            )
+
+        except Exception as e:
+            logger.warning("ConversationCallback error: {}", e)
+
+    async def async_log_failure_event(
+        self,
+        kwargs: dict[str, Any],
+        response_obj: Any,
+        start_time: float,
+        end_time: float,
+    ) -> None:
+        """Called by LiteLLM when a completion fails."""
+        try:
+            model = kwargs.get("model", "unknown")
+            litellm_params = kwargs.get("litellm_params", {})
+            metadata = litellm_params.get("metadata", {})
+            exception = kwargs.get("exception", None)
+
+            error_entry = {
+                "timestamp": datetime.now().isoformat(),
+                "event_type": "failure",
+                "model": model,
+                "metadata": metadata,
+                "error": str(exception) if exception else "Unknown error",
+                "error_type": type(exception).__name__ if exception else "Unknown",
+                "start_time": start_time.isoformat() if isinstance(start_time, datetime) else start_time,
+                "end_time": end_time.isoformat() if isinstance(end_time, datetime) else end_time,
+                "duration_ms": _calc_duration_ms(start_time, end_time),
+            }
+
+            if self.jsonl_path:
+                self.jsonl_path.parent.mkdir(parents=True, exist_ok=True)
+                async with self._write_lock:
+                    with open(self.jsonl_path, "a", encoding="utf-8") as f:
+                        f.write(json.dumps(error_entry, ensure_ascii=False) + "\n")
+
+            logger.warning(
+                "ConversationCallback failure: session={}, model={}, error={}",
+                metadata.get("session_key", "unknown"),
+                model,
+                exception
+            )
+
+        except Exception as e:
+            logger.error("ConversationCallback failure logging error: {}", e)
+
+    def __repr__(self) -> str:
+        return f"ConversationCallback(jsonl_path={self.jsonl_path})"
--- a/nanobot/providers/litellm_provider.py
+++ b/nanobot/providers/litellm_provider.py
@ -12,6 +12,7 @@ from litellm import acompletion
 from loguru import logger

 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
+from nanobot.providers.callbacks import ConversationCallback
 from nanobot.providers.registry import find_by_model, find_gateway

 # Standard chat-completion message keys.
@ -40,10 +41,12 @@ class LiteLLMProvider(LLMProvider):
        default_model: str = "anthropic/claude-opus-4-5",
        extra_headers: dict[str, str] | None = None,
        provider_name: str | None = None,
+        conversation_callback: ConversationCallback | None = None,
    ):
        super().__init__(api_key, api_base)
        self.default_model = default_model
        self.extra_headers = extra_headers or {}
+        self._conversation_callback = conversation_callback

        # Detect gateway / local deployment.
        # provider_name (from config key) is the primary signal;
@ -232,6 +235,7 @@ class LiteLLMProvider(LLMProvider):
        temperature: float = 0.7,
        reasoning_effort: str | None = None,
        tool_choice: str | dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
    ) -> LLMResponse:
        """
        Send a chat completion request via LiteLLM.
@ -270,8 +274,18 @@ class LiteLLMProvider(LLMProvider):
        # Apply model-specific overrides (e.g. kimi-k2.5 temperature)
        self._apply_model_overrides(model, kwargs)

+        # Callbacks: custom conversation callback + LangSmith
+        callbacks_list = []
+        if self._conversation_callback:
+            callbacks_list.append(self._conversation_callback)
        if self._langsmith_enabled:
-            kwargs.setdefault("callbacks", []).append("langsmith")
+            callbacks_list.append("langsmith")
+        if callbacks_list:
+            kwargs["callbacks"] = callbacks_list
+
+        # Pass metadata for callback correlation (session_key, agent_type, etc.)
+        if metadata:
+            kwargs["metadata"] = metadata

        # Pass api_key directly — more reliable than env vars alone
        if self.api_key:
--- a/tests/providers/test_callbacks.py
+++ b/tests/providers/test_callbacks.py
@ -0,0 +1,152 @@
+# tests/providers/test_callbacks.py
+import pytest
+from unittest.mock import AsyncMock, MagicMock
+from pathlib import Path
+
+
+def test_callback_module_exists():
+    from nanobot.providers.callbacks import ConversationCallback
+    assert ConversationCallback is not None
+
+
+def test_callback_inherits_custom_logger():
+    """Verify callback follows LiteLLM's CustomLogger pattern."""
+    from nanobot.providers.callbacks import ConversationCallback
+    from litellm.integrations.custom_logger import CustomLogger
+
+    cb = ConversationCallback()
+    assert isinstance(cb, CustomLogger)
+    assert hasattr(cb, "async_log_success_event")
+    assert hasattr(cb, "async_log_failure_event")
+
+
+@pytest.mark.asyncio
+async def test_async_log_success_event_extracts_full_messages():
+    """Verify callback captures full message history from kwargs["messages"]."""
+    from nanobot.providers.callbacks import ConversationCallback
+
+    cb = ConversationCallback()
+
+    # Simulate a subagent conversation with tool calls
+    kwargs = {
+        "model": "anthropic/claude-opus-4-5",
+        "messages": [
+            {"role": "system", "content": "You are a subagent..."},
+            {"role": "user", "content": "Read test.txt and summarize it"},
+            {"role": "assistant", "content": "I'll read the file.", "tool_calls": [
+                {"id": "call_1", "type": "function", "function": {"name": "read_file", "arguments": '{"path": "test.txt"}'}}
+            ]},
+            {"role": "tool", "tool_call_id": "call_1", "name": "read_file", "content": "Hello World"},
+            {"role": "assistant", "content": "The file contains: Hello World"},
+        ],
+        "litellm_params": {
+            "metadata": {
+                "session_key": "subagent:abc12345",
+                "agent_type": "subagent",
+                "parent_session": "cli:direct",
+                "task_id": "abc12345",
+            }
+        },
+        "response_cost": 0.0025,
+        "cache_hit": False,
+    }
+
+    response_obj = MagicMock()
+    response_obj.choices = [MagicMock()]
+    response_obj.choices[0].message.content = "The file contains: Hello World"
+    response_obj.choices[0].finish_reason = "stop"
+    response_obj.usage = MagicMock()
+    response_obj.usage.prompt_tokens = 150
+    response_obj.usage.completion_tokens = 20
+    response_obj.usage.total_tokens = 170
+
+    # Should not raise
+    await cb.async_log_success_event(kwargs, response_obj, 0.0, 0.5)
+
+
+@pytest.mark.asyncio
+async def test_callback_writes_to_jsonl(tmp_path):
+    """Verify callback writes trace to JSONL file."""
+    from nanobot.providers.callbacks import ConversationCallback
+
+    jsonl_path = tmp_path / "traces.jsonl"
+    cb = ConversationCallback(jsonl_path=jsonl_path)
+
+    kwargs = {
+        "model": "test-model",
+        "messages": [{"role": "user", "content": "hi"}],
+        "litellm_params": {"metadata": {"session_key": "test:123"}},
+        "response_cost": 0.001,
+    }
+
+    response_obj = MagicMock()
+    response_obj.choices = [MagicMock()]
+    response_obj.choices[0].message.content = "hello"
+    response_obj.choices[0].finish_reason = "stop"
+    response_obj.usage = MagicMock()
+    response_obj.usage.prompt_tokens = 5
+    response_obj.usage.completion_tokens = 5
+    response_obj.usage.total_tokens = 10
+
+    await cb.async_log_success_event(kwargs, response_obj, 0.0, 0.1)
+
+    # Verify file was written
+    assert jsonl_path.exists()
+    import json
+    with open(jsonl_path) as f:
+        entry = json.loads(f.readline())
+    assert entry["model"] == "test-model"
+    assert len(entry["messages"]) == 1
+    assert entry["metadata"]["session_key"] == "test:123"
+
+
+@pytest.mark.asyncio
+async def test_async_log_failure_event_extracts_fields():
+    """Verify async_log_failure_event captures error details from kwargs."""
+    from nanobot.providers.callbacks import ConversationCallback
+
+    cb = ConversationCallback()
+
+    kwargs = {
+        "model": "anthropic/claude-opus-4-5",
+        "litellm_params": {
+            "metadata": {
+                "session_key": "cli:direct",
+                "agent_type": "main",
+            }
+        },
+        "exception": ValueError("Rate limit exceeded"),
+    }
+
+    # Should not raise
+    await cb.async_log_failure_event(kwargs, None, 0.0, 1.0)
+
+
+@pytest.mark.asyncio
+async def test_async_log_failure_event_writes_to_jsonl(tmp_path):
+    """Verify failure event is written to JSONL with correct structure."""
+    from nanobot.providers.callbacks import ConversationCallback
+
+    jsonl_path = tmp_path / "traces.jsonl"
+    cb = ConversationCallback(jsonl_path=jsonl_path)
+
+    kwargs = {
+        "model": "test-model",
+        "litellm_params": {"metadata": {"session_key": "test:456"}},
+        "exception": ValueError("API error"),
+    }
+
+    await cb.async_log_failure_event(kwargs, None, 0.0, 0.5)
+
+    # Verify file was written
+    assert jsonl_path.exists()
+    import json
+    with open(jsonl_path) as f:
+        entry = json.loads(f.readline())
+
+    assert entry["event_type"] == "failure"
+    assert entry["model"] == "test-model"
+    assert entry["metadata"]["session_key"] == "test:456"
+    assert entry["error"] == "API error"
+    assert entry["error_type"] == "ValueError"
+    assert "duration_ms" in entry