mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-19 16:12:30 +00:00
Reasoning now flows as its own stream — symmetric to the answer's ``delta`` / ``stream_end`` pair — instead of being shipped as one oversized progress message. This lets WebUI render a live "Thinking…" bubble that updates in place, then auto-collapses when the stream closes. Other channels remain plugin no-ops by default. ## Protocol New metadata: ``_reasoning_delta`` (chunk) and ``_reasoning_end`` (close marker). ChannelManager routes both to the dedicated plugin hooks below; the legacy one-shot ``_reasoning`` is kept for back-compat and BaseChannel expands it into a single delta + end pair so plugins only ever implement the streaming primitives. WebSocket emits two new events: - ``reasoning_delta`` (event, chat_id, text, optional stream_id) - ``reasoning_end`` (event, chat_id, optional stream_id) ## BaseChannel surface - ``send_reasoning_delta(chat_id, delta, metadata)`` — no-op default - ``send_reasoning_end(chat_id, metadata)`` — no-op default - ``send_reasoning(msg)`` — back-compat wrapper, base impl forwards to the streaming primitives A channel adds reasoning support by overriding the two streaming primitives. Telegram / Slack / Discord / Feishu / WeChat / Matrix keep the base no-ops until their bubble UIs are adapted; reasoning silently drops at dispatch, never as a stray text message. ## AgentHook Adds ``emit_reasoning_end`` to the hook lifecycle. ``_LoopHook`` tracks whether a reasoning segment is open and closes it on: - the first answer delta arriving (so the UI locks the bubble before the answer renders below), - ``on_stream_end``, - one-shot ``reasoning_content`` / ``thinking_blocks`` after a single non-streaming response. ## WebUI - ``UIMessage.reasoning`` is now a single accumulated string with a companion ``reasoningStreaming`` flag. - ``useNanobotStream`` consumes ``reasoning_delta`` / ``reasoning_end``; legacy ``kind: "reasoning"`` is auto-translated to a delta + end. - New ``ReasoningBubble``: shimmer header + auto-expanded while streaming, collapses to a clickable "Thinking" pill once closed, respects ``prefers-reduced-motion``. - Answer deltas adopt the reasoning placeholder so the bubble and the answer share one assistant row. ## Tests - ``tests/channels/test_channel_manager_reasoning.py`` — manager routes delta + end, drops on channel opt-out, expands one-shot back-compat. - ``tests/channels/test_websocket_channel.py`` — new ``reasoning_delta`` / ``reasoning_end`` frames, empty-chunk safety, no-subscriber safety, back-compat expansion. - ``tests/agent/test_runner_reasoning.py`` — runner closes the segment on streaming answer start and after one-shot reasoning. - WebUI ``useNanobotStream`` + ``message-bubble`` cover the new protocol and the shimmer styling. ## Docs ``docs/configuration.md`` and ``docs/websocket.md`` document the new events and the plugin contract. Co-authored-by: Cursor <cursoragent@cursor.com>
322 lines
11 KiB
Python
322 lines
11 KiB
Python
"""Tests for AgentRunner reasoning extraction and emission.
|
|
|
|
Covers the three sources of model reasoning (dedicated ``reasoning_content``,
|
|
Anthropic ``thinking_blocks``, inline ``<think>``/``<thought>`` tags) plus
|
|
the streaming interaction: reasoning and answer streams are independent
|
|
channels, gated by ``context.streamed_reasoning`` rather than
|
|
``context.streamed_content``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
from nanobot.agent.hook import AgentHook
|
|
from nanobot.config.schema import AgentDefaults
|
|
from nanobot.providers.base import LLMResponse, ToolCallRequest
|
|
|
|
_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
|
|
|
|
|
|
class _RecordingHook(AgentHook):
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self.emitted: list[str] = []
|
|
self.end_calls = 0
|
|
|
|
async def emit_reasoning(self, reasoning_content: str | None) -> None:
|
|
if reasoning_content:
|
|
self.emitted.append(reasoning_content)
|
|
|
|
async def emit_reasoning_end(self) -> None:
|
|
self.end_calls += 1
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_preserves_reasoning_fields_in_assistant_history():
|
|
"""Reasoning fields ride along on the persisted assistant message so
|
|
follow-up provider calls retain the model's prior thinking context."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock()
|
|
captured_second_call: list[dict] = []
|
|
call_count = {"n": 0}
|
|
|
|
async def chat_with_retry(*, messages, **kwargs):
|
|
call_count["n"] += 1
|
|
if call_count["n"] == 1:
|
|
return LLMResponse(
|
|
content="thinking",
|
|
tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
|
|
reasoning_content="hidden reasoning",
|
|
thinking_blocks=[{"type": "thinking", "thinking": "step"}],
|
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
|
)
|
|
captured_second_call[:] = messages
|
|
return LLMResponse(content="done", tool_calls=[], usage={})
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
tools.execute = AsyncMock(return_value="tool result")
|
|
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[
|
|
{"role": "system", "content": "system"},
|
|
{"role": "user", "content": "do task"},
|
|
],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
))
|
|
|
|
assert result.final_content == "done"
|
|
assistant_messages = [
|
|
msg for msg in captured_second_call
|
|
if msg.get("role") == "assistant" and msg.get("tool_calls")
|
|
]
|
|
assert len(assistant_messages) == 1
|
|
assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
|
|
assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_emits_anthropic_thinking_blocks():
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock()
|
|
|
|
async def chat_with_retry(**kwargs):
|
|
return LLMResponse(
|
|
content="The answer is 42.",
|
|
thinking_blocks=[
|
|
{"type": "thinking", "thinking": "Let me analyze this step by step.", "signature": "sig1"},
|
|
{"type": "thinking", "thinking": "After careful consideration.", "signature": "sig2"},
|
|
],
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
|
)
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
hook = _RecordingHook()
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "question"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
hook=hook,
|
|
))
|
|
|
|
assert result.final_content == "The answer is 42."
|
|
assert len(hook.emitted) == 1
|
|
assert "Let me analyze this" in hook.emitted[0]
|
|
assert "After careful consideration" in hook.emitted[0]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_emits_inline_think_content_as_reasoning():
|
|
"""Models embedding reasoning in <think>...</think> blocks should have
|
|
that content extracted and emitted, and stripped from the answer."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock()
|
|
|
|
async def chat_with_retry(**kwargs):
|
|
return LLMResponse(
|
|
content="<think>Let me think about this...\nThe answer is 42.</think>The answer is 42.",
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
|
)
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
hook = _RecordingHook()
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "what is the answer?"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
hook=hook,
|
|
))
|
|
|
|
assert result.final_content == "The answer is 42."
|
|
assert len(hook.emitted) == 1
|
|
assert "Let me think about this" in hook.emitted[0]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_prefers_reasoning_content_over_inline_think():
|
|
"""Fallback priority: dedicated reasoning_content wins; inline <think>
|
|
is still scrubbed from the answer content."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock()
|
|
|
|
async def chat_with_retry(**kwargs):
|
|
return LLMResponse(
|
|
content="<think>inline thinking</think>The answer.",
|
|
reasoning_content="dedicated reasoning field",
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
|
)
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
hook = _RecordingHook()
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "question"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
hook=hook,
|
|
))
|
|
|
|
assert result.final_content == "The answer."
|
|
assert hook.emitted == ["dedicated reasoning field"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_emits_reasoning_content_even_when_answer_was_streamed():
|
|
"""`reasoning_content` arrives only on the final response; streaming the
|
|
answer must not suppress it (the answer stream and the reasoning channel
|
|
are independent — only the reasoning-already-emitted bit matters)."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock()
|
|
provider.supports_progress_deltas = True
|
|
|
|
async def chat_stream_with_retry(*, on_content_delta=None, **kwargs):
|
|
if on_content_delta:
|
|
await on_content_delta("The ")
|
|
await on_content_delta("answer.")
|
|
return LLMResponse(
|
|
content="The answer.",
|
|
reasoning_content="step-by-step deduction",
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
|
)
|
|
|
|
provider.chat_stream_with_retry = chat_stream_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
progress_calls: list[str] = []
|
|
|
|
async def _progress(content: str, **_kwargs):
|
|
progress_calls.append(content)
|
|
|
|
hook = _RecordingHook()
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "question"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
hook=hook,
|
|
stream_progress_deltas=True,
|
|
progress_callback=_progress,
|
|
))
|
|
|
|
assert result.final_content == "The answer."
|
|
assert progress_calls, "answer should have streamed via progress callback"
|
|
assert hook.emitted == ["step-by-step deduction"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_does_not_double_emit_when_inline_think_already_streamed():
|
|
"""Inline `<think>` blocks streamed incrementally during the answer
|
|
stream must not be re-emitted from the final response."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock()
|
|
provider.supports_progress_deltas = True
|
|
|
|
async def chat_stream_with_retry(*, on_content_delta=None, **kwargs):
|
|
if on_content_delta:
|
|
await on_content_delta("<think>working...</think>")
|
|
await on_content_delta("The answer.")
|
|
return LLMResponse(
|
|
content="<think>working...</think>The answer.",
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
|
)
|
|
|
|
provider.chat_stream_with_retry = chat_stream_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
async def _progress(content: str, **_kwargs):
|
|
pass
|
|
|
|
hook = _RecordingHook()
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "question"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
hook=hook,
|
|
stream_progress_deltas=True,
|
|
progress_callback=_progress,
|
|
))
|
|
|
|
assert result.final_content == "The answer."
|
|
assert hook.emitted == ["working..."]
|
|
assert hook.end_calls >= 1, "reasoning stream must be closed once the answer starts"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_runner_closes_reasoning_stream_after_one_shot_response():
|
|
"""A non-streaming response carrying ``reasoning_content`` must emit
|
|
both a reasoning delta and an end marker so channels can finalize the
|
|
in-place bubble."""
|
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
|
|
|
provider = MagicMock()
|
|
|
|
async def chat_with_retry(**kwargs):
|
|
return LLMResponse(
|
|
content="answer",
|
|
reasoning_content="hidden thought",
|
|
tool_calls=[],
|
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
|
)
|
|
|
|
provider.chat_with_retry = chat_with_retry
|
|
tools = MagicMock()
|
|
tools.get_definitions.return_value = []
|
|
|
|
hook = _RecordingHook()
|
|
runner = AgentRunner(provider)
|
|
result = await runner.run(AgentRunSpec(
|
|
initial_messages=[{"role": "user", "content": "q"}],
|
|
tools=tools,
|
|
model="test-model",
|
|
max_iterations=3,
|
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
|
hook=hook,
|
|
))
|
|
|
|
assert result.final_content == "answer"
|
|
assert hook.emitted == ["hidden thought"]
|
|
assert hook.end_calls == 1
|