From d630ac90d1b88086e79ba595bedfe0abab66eb74 Mon Sep 17 00:00:00 2001 From: Flinn Xie Date: Wed, 6 May 2026 01:34:23 +0800 Subject: [PATCH 01/17] fix(cli): prevent TUI content duplication via transient Live and renderer routing Route progress output through the Live's render hook to fix cursor misalignment that caused content duplication. The root cause was that progress/reasoning output used a separate Console instance, bypassing Rich Live's process_renderables hook. Also fixes pre-existing issue where multiple headers printed per agent turn. Co-Authored-By: Claude Opus 4.7 --- nanobot/cli/commands.py | 46 ++++++++----- nanobot/cli/stream.py | 86 ++++++++++++++++-------- tests/cli/test_interactive_retry_wait.py | 2 +- 3 files changed, 88 insertions(+), 46 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 243280ed1..236d787ce 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -227,30 +227,37 @@ async def _print_interactive_response( await run_in_terminal(_write) -def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None) -> None: +def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None: """Print a CLI progress line, pausing the spinner if needed.""" if not text.strip(): return - with thinking.pause() if thinking else nullcontext(): - console.print(f" [dim]↳ {text}[/dim]") + target = renderer.console if renderer else console + pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext()) + with pause: + target.print(f" [dim]↳ {text}[/dim]") -async def _print_interactive_progress_line(text: str, renderer: StreamRenderer | None) -> None: - """Print an interactive progress line, pausing the renderer's spinner if needed.""" +async def _print_interactive_progress_line(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None: + """Print an interactive progress line, pausing the spinner if needed.""" if not text.strip(): return - with renderer.pause() if renderer else nullcontext(): - await _print_interactive_line(text) + if renderer: + with renderer.pause_spinner(): + renderer.console.print(f" [dim]↳ {text}[/dim]") + else: + with thinking.pause() if thinking else nullcontext(): + await _print_interactive_line(text) async def _maybe_print_interactive_progress( msg: Any, - renderer: StreamRenderer | None, + thinking: ThinkingSpinner | None, channels_config: Any, + renderer: StreamRenderer | None = None, ) -> bool: metadata = msg.metadata or {} if metadata.get("_retry_wait"): - await _print_interactive_progress_line(msg.content, renderer) + await _print_interactive_progress_line(msg.content, thinking, renderer) return True if not metadata.get("_progress"): @@ -262,7 +269,7 @@ async def _maybe_print_interactive_progress( if channels_config and not is_tool_hint and not channels_config.send_progress: return True - await _print_interactive_progress_line(msg.content, renderer) + await _print_interactive_progress_line(msg.content, thinking, renderer) return True @@ -1121,13 +1128,15 @@ def agent( # Shared reference for progress callbacks _thinking: ThinkingSpinner | None = None - async def _cli_progress(content: str, *, tool_hint: bool = False, **_kwargs: Any) -> None: - ch = agent_loop.channels_config - if ch and tool_hint and not ch.send_tool_hints: - return - if ch and not tool_hint and not ch.send_progress: - return - _print_cli_progress_line(content, _thinking) + def _make_progress(renderer: StreamRenderer | None = None): + async def _cli_progress(content: str, *, tool_hint: bool = False, **_kwargs: Any) -> None: + ch = agent_loop.channels_config + if ch and tool_hint and not ch.send_tool_hints: + return + if ch and not tool_hint and not ch.send_progress: + return + _print_cli_progress_line(content, _thinking, renderer) + return _cli_progress if message: # Single message mode — direct call, no bus needed @@ -1135,7 +1144,7 @@ def agent( renderer = StreamRenderer(render_markdown=markdown) response = await agent_loop.process_direct( message, session_id, - on_progress=_cli_progress, + on_progress=_make_progress(renderer), on_stream=renderer.on_delta, on_stream_end=renderer.on_end, ) @@ -1206,6 +1215,7 @@ def agent( msg, renderer, agent_loop.channels_config, + renderer, ): continue diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py index b0095f153..807c88fef 100644 --- a/nanobot/cli/stream.py +++ b/nanobot/cli/stream.py @@ -1,13 +1,15 @@ """Streaming renderer for CLI output. -Uses Rich Live with auto_refresh=False for stable, flicker-free -markdown rendering during streaming. Ellipsis mode handles overflow. +Uses Rich Live with ``transient=True`` for in-place markdown updates during +streaming. After the live display stops, a final clean render is printed +so the content persists on screen. ``transient=True`` ensures the live +area is erased before ``stop()`` returns, avoiding the duplication bug +that plagued earlier approaches. """ from __future__ import annotations import sys -import time from rich.console import Console from rich.live import Live @@ -67,27 +69,38 @@ class ThinkingSpinner: class StreamRenderer: - """Rich Live streaming with markdown. auto_refresh=False avoids render races. + """Streaming renderer with Rich Live for in-place updates. - Deltas arrive pre-filtered (no tags) from the agent loop. + During streaming: updates content in-place via Rich Live. + On end: stops Live (transient=True erases it), then prints final render. Flow per round: - spinner -> first visible delta -> header + Live renders -> - on_end -> Live stops (content stays on screen) + spinner -> first delta -> header + Live updates -> + on_end -> stop Live + final render """ def __init__(self, render_markdown: bool = True, show_spinner: bool = True): self._md = render_markdown self._show_spinner = show_spinner self._buf = "" - self._live: Live | None = None - self._t = 0.0 self.streamed = False + self._header_printed = False + self._console = _make_console() + self._live: Live | None = None self._spinner: ThinkingSpinner | None = None self._start_spinner() - def _render(self): - return Markdown(self._buf) if self._md and self._buf else Text(self._buf or "") + def _renderable(self): + """Create a renderable from the current buffer.""" + if self._md and self._buf: + return Markdown(self._buf) + return Text(self._buf or "") + + def _render_str(self) -> str: + """Render current buffer to a plain string via Rich.""" + with self._console.capture() as cap: + self._console.print(self._renderable()) + return cap.get() def _start_spinner(self) -> None: if self._show_spinner: @@ -99,36 +112,55 @@ class StreamRenderer: self._spinner.__exit__(None, None, None) self._spinner = None + @property + def console(self) -> Console: + """Expose the Live's console so external print functions can use it.""" + return self._console + + def pause_spinner(self): + """Context manager: temporarily stop spinner for clean output.""" + if self._spinner: + return self._spinner.pause() + from contextlib import nullcontext + return nullcontext() + async def on_delta(self, delta: str) -> None: self.streamed = True self._buf += delta - if self._live is None: - if not self._buf.strip(): - return - self._stop_spinner() - c = _make_console() - c.print() - c.print(f"[cyan]{__logo__} nanobot[/cyan]") - self._live = Live(self._render(), console=c, auto_refresh=False) + if not self._header_printed and self._buf.strip(): + self._console.print() + self._console.print(f"[cyan]{__logo__} nanobot[/cyan]") + self._header_printed = True + self._stop_spinner() + if not self._live: + self._live = Live( + self._renderable(), + console=self._console, + auto_refresh=False, + transient=True, + ) self._live.start() - now = time.monotonic() - if (now - self._t) > 0.15: - self._live.update(self._render()) - self._live.refresh() - self._t = now + else: + self._live.update(self._renderable()) + self._live.refresh() async def on_end(self, *, resuming: bool = False) -> None: if self._live: - self._live.update(self._render()) + # Double-refresh to sync _shape before stop() calls refresh(). + self._live.refresh() + self._live.update(self._renderable()) self._live.refresh() self._live.stop() self._live = None self._stop_spinner() + if self._header_printed and self._buf.strip(): + # Print final rendered content (persists after Live is gone). + out = sys.stdout + out.write(self._render_str()) + out.flush() if resuming: self._buf = "" self._start_spinner() - else: - _make_console().print() def stop_for_input(self) -> None: """Stop spinner before user input to avoid prompt_toolkit conflicts.""" diff --git a/tests/cli/test_interactive_retry_wait.py b/tests/cli/test_interactive_retry_wait.py index 5cc217c56..e58102dcd 100644 --- a/tests/cli/test_interactive_retry_wait.py +++ b/tests/cli/test_interactive_retry_wait.py @@ -17,7 +17,7 @@ async def test_interactive_retry_wait_is_rendered_as_progress_even_when_progress metadata={"_retry_wait": True}, ) - async def fake_print(text: str, active_thinking: object | None) -> None: + async def fake_print(text: str, active_thinking: object | None, renderer=None) -> None: calls.append((text, active_thinking)) with patch("nanobot.cli.commands._print_interactive_progress_line", side_effect=fake_print): From 3a27af0018b106f4b9212289c75da03d3e67da62 Mon Sep 17 00:00:00 2001 From: Flinn Xie Date: Wed, 6 May 2026 01:35:53 +0800 Subject: [PATCH 02/17] feat(cli): display model reasoning content during streaming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add show_reasoning config (default: False) to display model thinking/reasoning content in the TUI during streaming. Reasoning is emitted via a new emit_reasoning hook on AgentHook, gated by the channels config. Display uses ✻ prefix with dim italic styling. Co-Authored-By: Claude Opus 4.7 --- nanobot/agent/hook.py | 6 +++ nanobot/agent/loop.py | 11 +++++ nanobot/agent/runner.py | 3 ++ nanobot/cli/commands.py | 27 +++++++++-- nanobot/cli/stream.py | 11 ++--- nanobot/config/schema.py | 1 + tests/agent/test_hook_composite.py | 23 ++++++++- tests/cli/test_cli_input.py | 54 ++++++++++++++++++++-- tests/cli/test_interactive_retry_wait.py | 59 ++++++++++++++++++++++++ 9 files changed, 182 insertions(+), 13 deletions(-) diff --git a/nanobot/agent/hook.py b/nanobot/agent/hook.py index d0106cfb6..5e4ea4d4d 100644 --- a/nanobot/agent/hook.py +++ b/nanobot/agent/hook.py @@ -48,6 +48,9 @@ class AgentHook: async def before_execute_tools(self, context: AgentHookContext) -> None: pass + async def emit_reasoning(self, reasoning_content: str | None) -> None: + pass + async def after_iteration(self, context: AgentHookContext) -> None: pass @@ -95,6 +98,9 @@ class CompositeHook(AgentHook): async def before_execute_tools(self, context: AgentHookContext) -> None: await self._for_each_hook_safe("before_execute_tools", context) + async def emit_reasoning(self, reasoning_content: str | None) -> None: + await self._for_each_hook_safe("emit_reasoning", reasoning_content) + async def after_iteration(self, context: AgentHookContext) -> None: await self._for_each_hook_safe("after_iteration", context) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 330c82357..e12bf53c9 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -155,6 +155,14 @@ class _LoopHook(AgentHook): session_key=self._session_key, ) + async def emit_reasoning(self, reasoning_content: str | None) -> None: + """Send reasoning/thinking content as progress before the main answer.""" + ch = self._loop.channels_config + if not ch or not ch.show_reasoning: + return + if self._on_progress and reasoning_content: + await self._on_progress(reasoning_content, reasoning=True) + async def after_iteration(self, context: AgentHookContext) -> None: if ( self._on_progress @@ -1114,10 +1122,13 @@ class AgentLoop: *, tool_hint: bool = False, tool_events: list[dict[str, Any]] | None = None, + reasoning: bool = False, ) -> None: meta = dict(msg.metadata or {}) meta["_progress"] = True meta["_tool_hint"] = tool_hint + if reasoning: + meta["_reasoning"] = True if tool_events: meta["_tool_events"] = tool_events await self.bus.publish_outbound( diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index 7fe92ad51..2ff2cf045 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -282,6 +282,9 @@ class AgentRunner: context.tool_calls = list(response.tool_calls) self._accumulate_usage(usage, raw_usage) + if response.reasoning_content: + await hook.emit_reasoning(response.reasoning_content) + if response.should_execute_tools: tool_calls = list(response.tool_calls) ask_index = next((i for i, tc in enumerate(tool_calls) if tc.name == "ask_user"), None) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 236d787ce..1c835962a 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -237,6 +237,16 @@ def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None, render target.print(f" [dim]↳ {text}[/dim]") +def _print_cli_reasoning(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None: + """Print reasoning/thinking content in a distinct style.""" + if not text.strip(): + return + target = renderer.console if renderer else console + pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext()) + with pause: + target.print(f"[dim italic]✻ {text}[/dim italic]") + + async def _print_interactive_progress_line(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None: """Print an interactive progress line, pausing the spinner if needed.""" if not text.strip(): @@ -264,12 +274,18 @@ async def _maybe_print_interactive_progress( return False is_tool_hint = metadata.get("_tool_hint", False) + is_reasoning = metadata.get("_reasoning", False) if channels_config and is_tool_hint and not channels_config.send_tool_hints: return True if channels_config and not is_tool_hint and not channels_config.send_progress: return True + if is_reasoning and channels_config and not channels_config.show_reasoning: + return True - await _print_interactive_progress_line(msg.content, thinking, renderer) + if is_reasoning: + _print_cli_reasoning(msg.content, thinking, renderer) + else: + await _print_interactive_progress_line(msg.content, thinking, renderer) return True @@ -1129,13 +1145,18 @@ def agent( _thinking: ThinkingSpinner | None = None def _make_progress(renderer: StreamRenderer | None = None): - async def _cli_progress(content: str, *, tool_hint: bool = False, **_kwargs: Any) -> None: + async def _cli_progress(content: str, *, tool_hint: bool = False, reasoning: bool = False, **_kwargs: Any) -> None: ch = agent_loop.channels_config if ch and tool_hint and not ch.send_tool_hints: return if ch and not tool_hint and not ch.send_progress: return - _print_cli_progress_line(content, _thinking, renderer) + if reasoning and ch and not ch.show_reasoning: + return + if reasoning: + _print_cli_reasoning(content, _thinking, renderer) + else: + _print_cli_progress_line(content, _thinking, renderer) return _cli_progress if message: diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py index 807c88fef..ec7f0a96c 100644 --- a/nanobot/cli/stream.py +++ b/nanobot/cli/stream.py @@ -84,7 +84,6 @@ class StreamRenderer: self._show_spinner = show_spinner self._buf = "" self.streamed = False - self._header_printed = False self._console = _make_console() self._live: Live | None = None self._spinner: ThinkingSpinner | None = None @@ -127,12 +126,12 @@ class StreamRenderer: async def on_delta(self, delta: str) -> None: self.streamed = True self._buf += delta - if not self._header_printed and self._buf.strip(): + if self._live is None: + if not self._buf.strip(): + return + self._stop_spinner() self._console.print() self._console.print(f"[cyan]{__logo__} nanobot[/cyan]") - self._header_printed = True - self._stop_spinner() - if not self._live: self._live = Live( self._renderable(), console=self._console, @@ -153,7 +152,7 @@ class StreamRenderer: self._live.stop() self._live = None self._stop_spinner() - if self._header_printed and self._buf.strip(): + if self._buf.strip(): # Print final rendered content (persists after Live is gone). out = sys.stdout out.write(self._render_str()) diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 47f2babcd..66a7a75aa 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -27,6 +27,7 @@ class ChannelsConfig(Base): send_progress: bool = True # stream agent's text progress to the channel send_tool_hints: bool = False # stream tool-call hints (e.g. read_file("…")) + show_reasoning: bool = False # show model reasoning/thinking content send_max_retries: int = Field(default=3, ge=0, le=10) # Max delivery attempts (initial send included) transcription_provider: str = "groq" # Voice transcription backend: "groq" or "openai" transcription_language: str | None = Field(default=None, pattern=r"^[a-z]{2,3}$") # Optional ISO-639-1 hint for audio transcription diff --git a/tests/agent/test_hook_composite.py b/tests/agent/test_hook_composite.py index 8971d48ec..9b6c2820d 100644 --- a/tests/agent/test_hook_composite.py +++ b/tests/agent/test_hook_composite.py @@ -13,6 +13,17 @@ def _ctx() -> AgentHookContext: return AgentHookContext(iteration=0, messages=[]) +# --------------------------------------------------------------------------- +# Base AgentHook emit_reasoning: no-op +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_base_hook_emit_reasoning_is_noop(): + hook = AgentHook() + await hook.emit_reasoning("should not raise") + + # --------------------------------------------------------------------------- # Fan-out: every hook is called in order # --------------------------------------------------------------------------- @@ -45,6 +56,9 @@ async def test_composite_fans_out_all_async_methods(): async def before_iteration(self, context: AgentHookContext) -> None: events.append("before_iteration") + async def emit_reasoning(self, reasoning_content: str | None) -> None: + events.append(f"emit_reasoning:{reasoning_content}") + async def on_stream(self, context: AgentHookContext, delta: str) -> None: events.append(f"on_stream:{delta}") @@ -61,6 +75,7 @@ async def test_composite_fans_out_all_async_methods(): ctx = _ctx() await hook.before_iteration(ctx) + await hook.emit_reasoning("thinking...") await hook.on_stream(ctx, "hi") await hook.on_stream_end(ctx, resuming=True) await hook.before_execute_tools(ctx) @@ -68,6 +83,7 @@ async def test_composite_fans_out_all_async_methods(): assert events == [ "before_iteration", "before_iteration", + "emit_reasoning:thinking...", "emit_reasoning:thinking...", "on_stream:hi", "on_stream:hi", "on_stream_end:True", "on_stream_end:True", "before_execute_tools", "before_execute_tools", @@ -120,6 +136,8 @@ async def test_composite_error_isolation_all_async(): calls: list[str] = [] class Bad(AgentHook): + async def emit_reasoning(self, reasoning_content): + raise RuntimeError("err") async def on_stream_end(self, context, *, resuming): raise RuntimeError("err") async def before_execute_tools(self, context): @@ -128,6 +146,8 @@ async def test_composite_error_isolation_all_async(): raise RuntimeError("err") class Good(AgentHook): + async def emit_reasoning(self, reasoning_content): + calls.append("emit_reasoning") async def on_stream_end(self, context, *, resuming): calls.append("on_stream_end") async def before_execute_tools(self, context): @@ -137,10 +157,11 @@ async def test_composite_error_isolation_all_async(): hook = CompositeHook([Bad(), Good()]) ctx = _ctx() + await hook.emit_reasoning("test") await hook.on_stream_end(ctx, resuming=False) await hook.before_execute_tools(ctx) await hook.after_iteration(ctx) - assert calls == ["on_stream_end", "before_execute_tools", "after_iteration"] + assert calls == ["emit_reasoning", "on_stream_end", "before_execute_tools", "after_iteration"] # --------------------------------------------------------------------------- diff --git a/tests/cli/test_cli_input.py b/tests/cli/test_cli_input.py index e648e818c..69293f4b8 100644 --- a/tests/cli/test_cli_input.py +++ b/tests/cli/test_cli_input.py @@ -156,17 +156,65 @@ def test_stream_renderer_stop_for_input_stops_spinner(): # Create renderer with mocked console with patch.object(stream_mod, "_make_console", return_value=mock_console): renderer = stream_mod.StreamRenderer(show_spinner=True) - + # Verify spinner started spinner.start.assert_called_once() - + # Stop for input renderer.stop_for_input() - + # Verify spinner stopped spinner.stop.assert_called_once() +@pytest.mark.asyncio +async def test_on_end_writes_final_content_to_stdout_after_stopping_live(): + """on_end should stop Live (transient erases it) then print final content to stdout.""" + mock_live = MagicMock() + mock_console = MagicMock() + mock_console.capture.return_value.__enter__ = MagicMock( + return_value=MagicMock(get=lambda: "final output\n") + ) + mock_console.capture.return_value.__exit__ = MagicMock(return_value=False) + + with patch.object(stream_mod, "_make_console", return_value=mock_console): + renderer = stream_mod.StreamRenderer(show_spinner=False) + renderer._live = mock_live + renderer._buf = "final output" + + written: list[str] = [] + with patch("sys.stdout") as mock_stdout: + mock_stdout.write = lambda s: written.append(s) + mock_stdout.flush = MagicMock() + await renderer.on_end() + + mock_live.stop.assert_called_once() + assert renderer._live is None + assert written == ["final output\n"] + + +@pytest.mark.asyncio +async def test_on_end_resuming_clears_buffer_and_restarts_spinner(): + """on_end(resuming=True) should reset state for the next iteration.""" + spinner = MagicMock() + mock_console = MagicMock() + mock_console.status.return_value = spinner + mock_console.capture.return_value.__enter__ = MagicMock( + return_value=MagicMock(get=lambda: "") + ) + mock_console.capture.return_value.__exit__ = MagicMock(return_value=False) + + with patch.object(stream_mod, "_make_console", return_value=mock_console): + renderer = stream_mod.StreamRenderer(show_spinner=True) + renderer._buf = "some content" + + await renderer.on_end(resuming=True) + + assert renderer._buf == "" + # Spinner should have been restarted (start called twice: __init__ + resuming) + assert spinner.start.call_count == 2 + + def test_make_console_force_terminal_when_stdout_is_tty(): """Console should set force_terminal=True when stdout is a TTY (rich output).""" import sys diff --git a/tests/cli/test_interactive_retry_wait.py b/tests/cli/test_interactive_retry_wait.py index e58102dcd..e693b057c 100644 --- a/tests/cli/test_interactive_retry_wait.py +++ b/tests/cli/test_interactive_retry_wait.py @@ -29,3 +29,62 @@ async def test_interactive_retry_wait_is_rendered_as_progress_even_when_progress assert handled is True assert calls == [("Model request failed, retry in 2s (attempt 1).", thinking)] + + +@pytest.mark.asyncio +async def test_reasoning_displayed_when_show_reasoning_enabled(): + """Reasoning content should be displayed when show_reasoning is True.""" + calls: list[str] = [] + channels_config = SimpleNamespace( + send_progress=True, send_tool_hints=False, show_reasoning=True, + ) + msg = SimpleNamespace( + content="Let me think about this...", + metadata={"_progress": True, "_reasoning": True}, + ) + + with patch("nanobot.cli.commands._print_cli_reasoning", side_effect=lambda t, th, r=None: calls.append(t)): + handled = await commands._maybe_print_interactive_progress(msg, None, channels_config) + + assert handled is True + assert calls == ["Let me think about this..."] + + +@pytest.mark.asyncio +async def test_reasoning_hidden_when_show_reasoning_disabled(): + """Reasoning content should be suppressed when show_reasoning is False.""" + channels_config = SimpleNamespace( + send_progress=True, send_tool_hints=False, show_reasoning=False, + ) + msg = SimpleNamespace( + content="Let me think about this...", + metadata={"_progress": True, "_reasoning": True}, + ) + + with patch("nanobot.cli.commands._print_cli_reasoning") as mock_reasoning: + handled = await commands._maybe_print_interactive_progress(msg, None, channels_config) + + assert handled is True + mock_reasoning.assert_not_called() + + +@pytest.mark.asyncio +async def test_non_reasoning_progress_not_affected_by_show_reasoning(): + """Regular progress lines should display regardless of show_reasoning.""" + calls: list[str] = [] + channels_config = SimpleNamespace( + send_progress=True, send_tool_hints=False, show_reasoning=False, + ) + msg = SimpleNamespace( + content="working on it...", + metadata={"_progress": True}, + ) + + async def fake_print(text: str, thinking=None, renderer=None): + calls.append(text) + + with patch("nanobot.cli.commands._print_interactive_progress_line", side_effect=fake_print): + handled = await commands._maybe_print_interactive_progress(msg, None, channels_config) + + assert handled is True + assert calls == ["working on it..."] From 3a851f8f8de09dd0c57b295958b0b7c67d362d0a Mon Sep 17 00:00:00 2001 From: Flinn Xie Date: Tue, 12 May 2026 23:02:59 +0800 Subject: [PATCH 03/17] feat(reasoning): add inline think tag extraction and Anthropic thinking_blocks support Add extract_think() and emit_incremental_think() helpers to extract thinking content from inline and tags in the content field. This handles models served via Ollama, self-hosted vLLM, or other compatible endpoints that embed reasoning as inline tags instead of using the dedicated reasoning_content API field. Also adds Anthropic thinking_blocks support for extended thinking via the thinking content blocks array. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- nanobot/agent/loop.py | 9 ++- nanobot/agent/runner.py | 28 ++++++- nanobot/utils/helpers.py | 41 +++++++++++ tests/agent/test_runner.py | 126 ++++++++++++++++++++++++++++++++ tests/utils/test_strip_think.py | 83 ++++++++++++++++++++- 5 files changed, 283 insertions(+), 4 deletions(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index e12bf53c9..9d2899b04 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -101,17 +101,23 @@ class _LoopHook(AgentHook): self._metadata = metadata or {} self._session_key = session_key self._stream_buf = "" + self._emitted_thinking = "" def wants_streaming(self) -> bool: return self._on_stream is not None async def on_stream(self, context: AgentHookContext, delta: str) -> None: - from nanobot.utils.helpers import strip_think + from nanobot.utils.helpers import emit_incremental_think, strip_think prev_clean = strip_think(self._stream_buf) self._stream_buf += delta new_clean = strip_think(self._stream_buf) incremental = new_clean[len(prev_clean) :] + + self._emitted_thinking = await emit_incremental_think( + self._stream_buf, self._emitted_thinking, self.emit_reasoning, + ) + if incremental and self._on_stream: await self._on_stream(incremental) @@ -119,6 +125,7 @@ class _LoopHook(AgentHook): if self._on_stream_end: await self._on_stream_end(resuming=resuming) self._stream_buf = "" + self._emitted_thinking = "" async def before_iteration(self, context: AgentHookContext) -> None: self._loop._current_iteration = context.iteration diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index 2ff2cf045..9a1cc6d65 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -18,8 +18,10 @@ from nanobot.agent.tools.registry import ToolRegistry from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest from nanobot.utils.helpers import ( build_assistant_message, + emit_incremental_think, estimate_message_tokens, estimate_prompt_tokens_chain, + extract_think, find_legal_message_start, maybe_persist_tool_result, strip_think, @@ -283,7 +285,23 @@ class AgentRunner: self._accumulate_usage(usage, raw_usage) if response.reasoning_content: - await hook.emit_reasoning(response.reasoning_content) + if not context.streamed_content: + await hook.emit_reasoning(response.reasoning_content) + if response.content: + response.content = strip_think(response.content) + elif response.thinking_blocks: + # Anthropic extended thinking: extract from thinking_blocks. + if not context.streamed_content: + parts = [tb.get("thinking", "") for tb in response.thinking_blocks if tb.get("type") == "thinking"] + if parts: + await hook.emit_reasoning("\n\n".join(parts)) + elif response.content: + inline_thinking, clean_content = extract_think(response.content) + if inline_thinking: + # Only emit if streaming didn't already handle it. + if not context.streamed_content: + await hook.emit_reasoning(inline_thinking) + response.content = clean_content if response.should_execute_tools: tool_calls = list(response.tool_calls) @@ -636,15 +654,21 @@ class AgentRunner: ) elif wants_progress_streaming: stream_buf = "" + emitted_thinking = "" async def _stream_progress(delta: str) -> None: - nonlocal stream_buf + nonlocal stream_buf, emitted_thinking if not delta: return prev_clean = strip_think(stream_buf) stream_buf += delta new_clean = strip_think(stream_buf) incremental = new_clean[len(prev_clean):] + + emitted_thinking = await emit_incremental_think( + stream_buf, emitted_thinking, hook.emit_reasoning, + ) + if incremental: context.streamed_content = True await spec.progress_callback(incremental) diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index b047e24d2..5301f4885 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -71,6 +71,47 @@ def strip_think(text: str) -> str: return text.strip() +def extract_think(text: str) -> tuple[str | None, str]: + """Extract thinking/reasoning content from and tags. + + Returns (thinking_text, cleaned_text) where: + - thinking_text: concatenated content from all ... and + ... blocks, or None if none found. + - cleaned_text: the input with all thinking blocks removed (same as + strip_think()). + + Only extracts from well-formed closed blocks. Unclosed trailing tags + (common during streaming) are stripped without extraction — use + strip_think() for pure streaming cleanup. + """ + parts: list[str] = [] + for m in re.finditer(r"([\s\S]*?)", text): + parts.append(m.group(1).strip()) + for m in re.finditer(r"([\s\S]*?)", text): + parts.append(m.group(1).strip()) + thinking = "\n\n".join(parts) if parts else None + return thinking, strip_think(text) + + +async def emit_incremental_think( + buf: str, + emitted: str, + emit_fn: Any, +) -> str: + """Extract new thinking from buf and emit if not yet emitted. + + Returns the updated emitted state. *emit_fn* is an async callable + that accepts a single reasoning string (e.g. ``hook.emit_reasoning``). + """ + thinking, _ = extract_think(buf) + if thinking and thinking != emitted: + new = thinking[len(emitted):] + if new.strip(): + await emit_fn(new.strip()) + return thinking + return emitted + + def detect_image_mime(data: bytes) -> str | None: """Detect image MIME type from magic bytes, ignoring file extension.""" if data[:8] == b"\x89PNG\r\n\x1a\n": diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py index b821d9bab..850e3caea 100644 --- a/tests/agent/test_runner.py +++ b/tests/agent/test_runner.py @@ -101,6 +101,132 @@ async def test_runner_preserves_reasoning_fields_and_tool_results(): ) +@pytest.mark.asyncio +async def test_runner_emits_anthropic_thinking_blocks(): + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + emitted_reasoning: list[str] = [] + + async def chat_with_retry(**kwargs): + return LLMResponse( + content="The answer is 42.", + thinking_blocks=[ + {"type": "thinking", "thinking": "Let me analyze this step by step.", "signature": "sig1"}, + {"type": "thinking", "thinking": "After careful consideration.", "signature": "sig2"}, + ], + tool_calls=[], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + class ReasoningHook(AgentHook): + async def emit_reasoning(self, reasoning_content: str | None) -> None: + if reasoning_content: + emitted_reasoning.append(reasoning_content) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "question"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=ReasoningHook(), + )) + + assert result.final_content == "The answer is 42." + assert len(emitted_reasoning) == 1 + assert "Let me analyze this" in emitted_reasoning[0] + assert "After careful consideration" in emitted_reasoning[0] + + +@pytest.mark.asyncio +async def test_runner_emits_inline_think_content_as_reasoning(): + """Models returning ... in content should have thinking extracted and emitted.""" + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + emitted_reasoning: list[str] = [] + + async def chat_with_retry(**kwargs): + return LLMResponse( + content="Let me think about this...\nThe answer is 42.The answer is 42.", + tool_calls=[], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + class ReasoningHook(AgentHook): + async def emit_reasoning(self, reasoning_content: str | None) -> None: + if reasoning_content: + emitted_reasoning.append(reasoning_content) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "what is the answer?"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=ReasoningHook(), + )) + + assert result.final_content == "The answer is 42." + assert len(emitted_reasoning) == 1 + assert "Let me think about this" in emitted_reasoning[0] + assert "The answer is 42" in emitted_reasoning[0] + + +@pytest.mark.asyncio +async def test_runner_prefers_reasoning_content_over_inline_think(): + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + emitted_reasoning: list[str] = [] + + async def chat_with_retry(**kwargs): + return LLMResponse( + content="inline thinkingThe answer.", + reasoning_content="dedicated reasoning field", + tool_calls=[], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + class ReasoningHook(AgentHook): + async def emit_reasoning(self, reasoning_content: str | None) -> None: + if reasoning_content: + emitted_reasoning.append(reasoning_content) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "question"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=ReasoningHook(), + )) + + assert result.final_content == "The answer." + # Only the dedicated field should be emitted, not the inline content + assert len(emitted_reasoning) == 1 + assert emitted_reasoning[0] == "dedicated reasoning field" + + @pytest.mark.asyncio async def test_runner_calls_hooks_in_order(): from nanobot.agent.hook import AgentHook, AgentHookContext diff --git a/tests/utils/test_strip_think.py b/tests/utils/test_strip_think.py index 5db93e658..65d952ad1 100644 --- a/tests/utils/test_strip_think.py +++ b/tests/utils/test_strip_think.py @@ -1,4 +1,4 @@ -from nanobot.utils.helpers import strip_think +from nanobot.utils.helpers import extract_think, strip_think class TestStripThinkTag: @@ -144,3 +144,84 @@ class TestStripThinkConservativePreserve: def test_literal_channel_marker_in_code_block_preserved(self): text = "Example:\n```\nif line.startswith(''):\n skip()\n```" assert strip_think(text) == text + + +class TestExtractThink: + + def test_no_think_tags(self): + thinking, clean = extract_think("Hello World") + assert thinking is None + assert clean == "Hello World" + + def test_single_think_block(self): + text = "Hello reasoning content\nhere World" + thinking, clean = extract_think(text) + assert thinking == "reasoning content\nhere" + assert clean == "Hello World" + + def test_single_thought_block(self): + text = "Hello reasoning content World" + thinking, clean = extract_think(text) + assert thinking == "reasoning content" + assert clean == "Hello World" + + def test_multiple_think_blocks(self): + text = "AfirstBsecondC" + thinking, clean = extract_think(text) + assert thinking == "first\n\nsecond" + assert clean == "ABC" + + def test_think_only_no_content(self): + text = "just thinking" + thinking, clean = extract_think(text) + assert thinking == "just thinking" + assert clean == "" + + def test_unclosed_think_not_extracted(self): + # Unclosed blocks at start are stripped but NOT extracted + text = "unclosed thinking..." + thinking, clean = extract_think(text) + assert thinking is None + assert clean == "" + + def test_empty_think_block(self): + text = "Hello World" + thinking, clean = extract_think(text) + # Empty blocks result in empty string after strip + assert thinking == "" + assert clean == "Hello World" + + def test_think_with_whitespace_only(self): + text = "Hello \n World" + thinking, clean = extract_think(text) + assert thinking is None + assert clean == "Hello \n World" + + def test_mixed_think_and_thought(self): + text = "Startfirst reasoningmiddlesecond reasoningEnd" + thinking, clean = extract_think(text) + assert thinking == "first reasoning\n\nsecond reasoning" + assert clean == "StartmiddleEnd" + + def test_real_world_ollama_response(self): + text = """ +The user is asking about Python list comprehensions. +Let me explain the syntax and give examples. + + +List comprehensions in Python provide a concise way to create lists. Here's the syntax: + +```python +[expression for item in iterable if condition] +``` + +For example: +```python +squares = [x**2 for x in range(10)] +```""" + thinking, clean = extract_think(text) + assert "list comprehensions" in thinking.lower() + assert "Let me explain" in thinking + assert "List comprehensions in Python" in clean + assert "" not in clean + assert "" not in clean From 352aaf0627385126929af011f08273c2e4f8b9aa Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Tue, 12 May 2026 17:13:42 +0000 Subject: [PATCH 04/17] refactor(reasoning): unify reasoning extraction across providers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reasoning surfacing was split across three branches in runner.py plus two separate streaming buffers (loop hook and runner progress stream), with three independent display-side gates in the CLI. This collapsed the policy into one source of truth and fixed two real bugs: - Structured `reasoning_content` was suppressed whenever the answer was streamed, because the runner gated emission on `streamed_content`. Providers don't stream `reasoning_content`; it only arrives on the final response, so the answer stream and the reasoning channel are independent. Added `streamed_reasoning` to `AgentHookContext` to track the right bit. - `channels.showReasoning` was subordinated to `sendProgress`. They are orthogonal — turning off progress streaming shouldn't silence reasoning. Reworked the CLI gates accordingly. Single-helper consolidation: - `extract_reasoning(reasoning_content, thinking_blocks, content)` returns `(reasoning_text, cleaned_content)` with a defined fallback order: dedicated field → Anthropic thinking_blocks → inline ``/`` tags. Models that expose none of these short-circuit to `(None, content)` — zero overhead. - `IncrementalThinkExtractor` replaces the ad-hoc `emit_incremental_think` function and its hand-rolled "emitted cursor" state in both the loop hook and the runner progress stream. Also documented the new `showReasoning` channel option in docs/configuration.md and noted its independence from sendProgress. Co-authored-by: Cursor --- docs/configuration.md | 1 + nanobot/agent/hook.py | 1 + nanobot/agent/loop.py | 13 ++- nanobot/agent/runner.py | 40 ++++----- nanobot/cli/commands.py | 24 +++--- nanobot/utils/helpers.py | 96 +++++++++++++++------ tests/agent/test_runner.py | 105 +++++++++++++++++++++++ tests/cli/test_interactive_retry_wait.py | 23 +++++ tests/utils/test_strip_think.py | 48 ++++++++++- 9 files changed, 281 insertions(+), 70 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 01d55c20b..01ef46814 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -677,6 +677,7 @@ Global settings that apply to all channels. Configure under the `channels` secti |---------|---------|-------------| | `sendProgress` | `true` | Stream agent's text progress to the channel | | `sendToolHints` | `false` | Stream tool-call hints (e.g. `read_file("…")`) | +| `showReasoning` | `false` | Surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `` tags). Independent of `sendProgress`. | | `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) | | `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key is auto-resolved from the matching provider config. | | `transcriptionLanguage` | `null` | Optional ISO-639-1 language hint for audio transcription, e.g. `"en"`, `"ko"`, `"ja"`. | diff --git a/nanobot/agent/hook.py b/nanobot/agent/hook.py index 5e4ea4d4d..86775742d 100644 --- a/nanobot/agent/hook.py +++ b/nanobot/agent/hook.py @@ -22,6 +22,7 @@ class AgentHookContext: tool_results: list[Any] = field(default_factory=list) tool_events: list[dict[str, str]] = field(default_factory=list) streamed_content: bool = False + streamed_reasoning: bool = False final_content: str | None = None stop_reason: str | None = None error: str | None = None diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 9d2899b04..028d9ddd9 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -48,7 +48,7 @@ from nanobot.providers.factory import ProviderSnapshot from nanobot.session.manager import Session, SessionManager from nanobot.utils.artifacts import generated_image_paths_from_messages from nanobot.utils.document import extract_documents -from nanobot.utils.helpers import image_placeholder_text +from nanobot.utils.helpers import IncrementalThinkExtractor, image_placeholder_text from nanobot.utils.helpers import truncate_text as truncate_text_fn from nanobot.utils.image_generation_intent import image_generation_prompt from nanobot.utils.progress_events import ( @@ -101,22 +101,21 @@ class _LoopHook(AgentHook): self._metadata = metadata or {} self._session_key = session_key self._stream_buf = "" - self._emitted_thinking = "" + self._think_extractor = IncrementalThinkExtractor() def wants_streaming(self) -> bool: return self._on_stream is not None async def on_stream(self, context: AgentHookContext, delta: str) -> None: - from nanobot.utils.helpers import emit_incremental_think, strip_think + from nanobot.utils.helpers import strip_think prev_clean = strip_think(self._stream_buf) self._stream_buf += delta new_clean = strip_think(self._stream_buf) incremental = new_clean[len(prev_clean) :] - self._emitted_thinking = await emit_incremental_think( - self._stream_buf, self._emitted_thinking, self.emit_reasoning, - ) + if await self._think_extractor.feed(self._stream_buf, self.emit_reasoning): + context.streamed_reasoning = True if incremental and self._on_stream: await self._on_stream(incremental) @@ -125,7 +124,7 @@ class _LoopHook(AgentHook): if self._on_stream_end: await self._on_stream_end(resuming=resuming) self._stream_buf = "" - self._emitted_thinking = "" + self._think_extractor.reset() async def before_iteration(self, context: AgentHookContext) -> None: self._loop._current_iteration = context.iteration diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index 9a1cc6d65..2713359be 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -17,11 +17,11 @@ from nanobot.agent.tools.ask import AskUserInterrupt from nanobot.agent.tools.registry import ToolRegistry from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest from nanobot.utils.helpers import ( + IncrementalThinkExtractor, build_assistant_message, - emit_incremental_think, estimate_message_tokens, estimate_prompt_tokens_chain, - extract_think, + extract_reasoning, find_legal_message_start, maybe_persist_tool_result, strip_think, @@ -284,24 +284,15 @@ class AgentRunner: context.tool_calls = list(response.tool_calls) self._accumulate_usage(usage, raw_usage) - if response.reasoning_content: - if not context.streamed_content: - await hook.emit_reasoning(response.reasoning_content) - if response.content: - response.content = strip_think(response.content) - elif response.thinking_blocks: - # Anthropic extended thinking: extract from thinking_blocks. - if not context.streamed_content: - parts = [tb.get("thinking", "") for tb in response.thinking_blocks if tb.get("type") == "thinking"] - if parts: - await hook.emit_reasoning("\n\n".join(parts)) - elif response.content: - inline_thinking, clean_content = extract_think(response.content) - if inline_thinking: - # Only emit if streaming didn't already handle it. - if not context.streamed_content: - await hook.emit_reasoning(inline_thinking) - response.content = clean_content + reasoning_text, cleaned_content = extract_reasoning( + response.reasoning_content, + response.thinking_blocks, + response.content, + ) + response.content = cleaned_content + if reasoning_text and not context.streamed_reasoning: + await hook.emit_reasoning(reasoning_text) + context.streamed_reasoning = True if response.should_execute_tools: tool_calls = list(response.tool_calls) @@ -654,10 +645,10 @@ class AgentRunner: ) elif wants_progress_streaming: stream_buf = "" - emitted_thinking = "" + think_extractor = IncrementalThinkExtractor() async def _stream_progress(delta: str) -> None: - nonlocal stream_buf, emitted_thinking + nonlocal stream_buf if not delta: return prev_clean = strip_think(stream_buf) @@ -665,9 +656,8 @@ class AgentRunner: new_clean = strip_think(stream_buf) incremental = new_clean[len(prev_clean):] - emitted_thinking = await emit_incremental_think( - stream_buf, emitted_thinking, hook.emit_reasoning, - ) + if await think_extractor.feed(stream_buf, hook.emit_reasoning): + context.streamed_reasoning = True if incremental: context.streamed_content = True diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 1c835962a..467683ed9 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -275,17 +275,17 @@ async def _maybe_print_interactive_progress( is_tool_hint = metadata.get("_tool_hint", False) is_reasoning = metadata.get("_reasoning", False) + if is_reasoning: + if channels_config and not channels_config.show_reasoning: + return True + _print_cli_reasoning(msg.content, thinking, renderer) + return True if channels_config and is_tool_hint and not channels_config.send_tool_hints: return True if channels_config and not is_tool_hint and not channels_config.send_progress: return True - if is_reasoning and channels_config and not channels_config.show_reasoning: - return True - if is_reasoning: - _print_cli_reasoning(msg.content, thinking, renderer) - else: - await _print_interactive_progress_line(msg.content, thinking, renderer) + await _print_interactive_progress_line(msg.content, thinking, renderer) return True @@ -1147,16 +1147,16 @@ def agent( def _make_progress(renderer: StreamRenderer | None = None): async def _cli_progress(content: str, *, tool_hint: bool = False, reasoning: bool = False, **_kwargs: Any) -> None: ch = agent_loop.channels_config + if reasoning: + if ch and not ch.show_reasoning: + return + _print_cli_reasoning(content, _thinking, renderer) + return if ch and tool_hint and not ch.send_tool_hints: return if ch and not tool_hint and not ch.send_progress: return - if reasoning and ch and not ch.show_reasoning: - return - if reasoning: - _print_cli_reasoning(content, _thinking, renderer) - else: - _print_cli_progress_line(content, _thinking, renderer) + _print_cli_progress_line(content, _thinking, renderer) return _cli_progress if message: diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 5301f4885..f348bc183 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -72,17 +72,11 @@ def strip_think(text: str) -> str: def extract_think(text: str) -> tuple[str | None, str]: - """Extract thinking/reasoning content from and tags. + """Extract thinking content from inline ```` / ```` blocks. - Returns (thinking_text, cleaned_text) where: - - thinking_text: concatenated content from all ... and - ... blocks, or None if none found. - - cleaned_text: the input with all thinking blocks removed (same as - strip_think()). - - Only extracts from well-formed closed blocks. Unclosed trailing tags - (common during streaming) are stripped without extraction — use - strip_think() for pure streaming cleanup. + Returns ``(thinking_text, cleaned_text)``. Only closed blocks are + extracted; unclosed streaming prefixes are stripped from the cleaned + text but not surfaced — :func:`strip_think` handles that case. """ parts: list[str] = [] for m in re.finditer(r"([\s\S]*?)", text): @@ -93,23 +87,75 @@ def extract_think(text: str) -> tuple[str | None, str]: return thinking, strip_think(text) -async def emit_incremental_think( - buf: str, - emitted: str, - emit_fn: Any, -) -> str: - """Extract new thinking from buf and emit if not yet emitted. +class IncrementalThinkExtractor: + """Stateful inline ```` extractor for streaming buffers. - Returns the updated emitted state. *emit_fn* is an async callable - that accepts a single reasoning string (e.g. ``hook.emit_reasoning``). + Streaming providers expose only a single content delta channel. When a + model embeds reasoning in ``...`` blocks inside that + channel, callers need to surface the reasoning incrementally as it + arrives without re-emitting earlier text. This holds the "already + emitted" cursor so the runner and the loop hook share one shape. """ - thinking, _ = extract_think(buf) - if thinking and thinking != emitted: - new = thinking[len(emitted):] - if new.strip(): - await emit_fn(new.strip()) - return thinking - return emitted + + __slots__ = ("_emitted",) + + def __init__(self) -> None: + self._emitted = "" + + def reset(self) -> None: + self._emitted = "" + + async def feed(self, buf: str, emit: Any) -> bool: + """Emit any new thinking text found in ``buf``. + + Returns True if anything was emitted this call. ``emit`` is an + async callable taking a single string (typically + ``hook.emit_reasoning``). + """ + thinking, _ = extract_think(buf) + if not thinking or thinking == self._emitted: + return False + new = thinking[len(self._emitted):].strip() + self._emitted = thinking + if not new: + return False + await emit(new) + return True + + +def extract_reasoning( + reasoning_content: str | None, + thinking_blocks: list[dict[str, Any]] | None, + content: str | None, +) -> tuple[str | None, str | None]: + """Return ``(reasoning_text, cleaned_content)`` from one model response. + + Single source of truth for "what reasoning did this response carry, and + what answer text remains after we peel it out". Fallback order: + + 1. Dedicated ``reasoning_content`` (DeepSeek-R1, Kimi, MiMo, OpenAI + reasoning models, Bedrock). + 2. Anthropic ``thinking_blocks``. + 3. Inline ```` / ```` blocks in ``content``. + + Only one source contributes per response; lower-priority sources are + ignored if a higher-priority one is present, but inline ```` + tags are still stripped from ``content`` so they never leak into the + final answer. + """ + if reasoning_content: + return reasoning_content, strip_think(content) if content else content + if thinking_blocks: + parts = [ + tb.get("thinking", "") + for tb in thinking_blocks + if isinstance(tb, dict) and tb.get("type") == "thinking" + ] + joined = "\n\n".join(p for p in parts if p) + return (joined or None), strip_think(content) if content else content + if content: + return extract_think(content) + return None, content def detect_image_mime(data: bytes) -> str | None: diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py index 850e3caea..d50b82cd4 100644 --- a/tests/agent/test_runner.py +++ b/tests/agent/test_runner.py @@ -227,6 +227,111 @@ async def test_runner_prefers_reasoning_content_over_inline_think(): assert emitted_reasoning[0] == "dedicated reasoning field" +@pytest.mark.asyncio +async def test_runner_emits_reasoning_content_even_when_answer_was_streamed(): + """`reasoning_content` arrives only on the final response; streaming the + answer must not suppress it (the answer stream and the reasoning channel + are independent — only the reasoning-already-emitted bit matters).""" + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + provider.supports_progress_deltas = True + emitted_reasoning: list[str] = [] + + async def chat_stream_with_retry(*, on_content_delta=None, **kwargs): + if on_content_delta: + await on_content_delta("The ") + await on_content_delta("answer.") + return LLMResponse( + content="The answer.", + reasoning_content="step-by-step deduction", + tool_calls=[], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + + provider.chat_stream_with_retry = chat_stream_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + class ReasoningHook(AgentHook): + async def emit_reasoning(self, reasoning_content: str | None) -> None: + if reasoning_content: + emitted_reasoning.append(reasoning_content) + + progress_calls: list[str] = [] + + async def _progress(content: str, **_kwargs): + progress_calls.append(content) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "question"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=ReasoningHook(), + stream_progress_deltas=True, + progress_callback=_progress, + )) + + assert result.final_content == "The answer." + # The answer must have streamed AND the dedicated reasoning_content must + # have been emitted exactly once after the stream completed. + assert progress_calls, "answer should have streamed via progress callback" + assert emitted_reasoning == ["step-by-step deduction"] + + +@pytest.mark.asyncio +async def test_runner_does_not_double_emit_when_inline_think_already_streamed(): + """Inline `` blocks streamed incrementally during the answer + stream must not be re-emitted from the final response.""" + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + provider.supports_progress_deltas = True + emitted_reasoning: list[str] = [] + + async def chat_stream_with_retry(*, on_content_delta=None, **kwargs): + if on_content_delta: + await on_content_delta("working...") + await on_content_delta("The answer.") + return LLMResponse( + content="working...The answer.", + tool_calls=[], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + + provider.chat_stream_with_retry = chat_stream_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + class ReasoningHook(AgentHook): + async def emit_reasoning(self, reasoning_content: str | None) -> None: + if reasoning_content: + emitted_reasoning.append(reasoning_content) + + async def _progress(content: str, **_kwargs): + pass + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "question"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=ReasoningHook(), + stream_progress_deltas=True, + progress_callback=_progress, + )) + + assert result.final_content == "The answer." + assert emitted_reasoning == ["working..."] + + @pytest.mark.asyncio async def test_runner_calls_hooks_in_order(): from nanobot.agent.hook import AgentHook, AgentHookContext diff --git a/tests/cli/test_interactive_retry_wait.py b/tests/cli/test_interactive_retry_wait.py index e693b057c..7ddef1c48 100644 --- a/tests/cli/test_interactive_retry_wait.py +++ b/tests/cli/test_interactive_retry_wait.py @@ -88,3 +88,26 @@ async def test_non_reasoning_progress_not_affected_by_show_reasoning(): assert handled is True assert calls == ["working on it..."] + + +@pytest.mark.asyncio +async def test_reasoning_shown_when_send_progress_disabled(): + """Reasoning display is governed by `show_reasoning` alone, independent + of `send_progress` — the two knobs are orthogonal.""" + calls: list[str] = [] + channels_config = SimpleNamespace( + send_progress=False, send_tool_hints=False, show_reasoning=True, + ) + msg = SimpleNamespace( + content="Let me think about this...", + metadata={"_progress": True, "_reasoning": True}, + ) + + with patch( + "nanobot.cli.commands._print_cli_reasoning", + side_effect=lambda t, th, r=None: calls.append(t), + ): + handled = await commands._maybe_print_interactive_progress(msg, None, channels_config) + + assert handled is True + assert calls == ["Let me think about this..."] diff --git a/tests/utils/test_strip_think.py b/tests/utils/test_strip_think.py index 65d952ad1..f1048f40c 100644 --- a/tests/utils/test_strip_think.py +++ b/tests/utils/test_strip_think.py @@ -1,4 +1,4 @@ -from nanobot.utils.helpers import extract_think, strip_think +from nanobot.utils.helpers import extract_reasoning, extract_think, strip_think class TestStripThinkTag: @@ -225,3 +225,49 @@ squares = [x**2 for x in range(10)] assert "List comprehensions in Python" in clean assert "" not in clean assert "" not in clean + + +class TestExtractReasoning: + """Single source of truth for reasoning extraction across all providers.""" + + def test_prefers_reasoning_content_and_strips_inline_think(self): + # Dedicated field wins; inline tags are still scrubbed from content. + reasoning, content = extract_reasoning( + "dedicated", + None, + "inlinevisible answer", + ) + assert reasoning == "dedicated" + assert content == "visible answer" + + def test_falls_back_to_thinking_blocks(self): + reasoning, content = extract_reasoning( + None, + [ + {"type": "thinking", "thinking": "step 1"}, + {"type": "thinking", "thinking": "step 2"}, + {"type": "redacted_thinking"}, + ], + "hello", + ) + assert reasoning == "step 1\n\nstep 2" + assert content == "hello" + + def test_falls_back_to_inline_think_tags(self): + reasoning, content = extract_reasoning( + None, None, "plananswer" + ) + assert reasoning == "plan" + assert content == "answer" + + def test_no_reasoning_returns_none(self): + reasoning, content = extract_reasoning(None, None, "plain answer") + assert reasoning is None + assert content == "plain answer" + + def test_empty_thinking_blocks_falls_through_to_inline(self): + reasoning, content = extract_reasoning( + None, [], "plananswer" + ) + assert reasoning == "plan" + assert content == "answer" From a6b059d37924059eef322261fcaa8340a6528fa4 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 06:27:53 +0000 Subject: [PATCH 05/17] refactor(reasoning): make channel plugins own reasoning rendering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reasoning was being shipped to every channel as a generic progress message with a `_reasoning: true` flag. Two problems with that: 1. Channels without a low-emphasis UI primitive (Telegram, Slack, Discord, Feishu...) would dump raw model thoughts as ordinary replies, polluting the conversation. 2. The agent loop double-gated by inspecting `channels_config`, which coupled the loop to display policy. Treat reasoning as its own plugin action — `BaseChannel.send_reasoning` defaults to a documented no-op; channels that have a fitting affordance override. ChannelManager routes `_reasoning` outbounds to that method only when the channel opts in via `show_reasoning` (camelCase alias `showReasoning` mirrors `sendProgress`). Plugins that don't override silently drop reasoning — "no fit, no leak" is the contract. Reference implementation lands for WebSocket / WebUI: a new `kind: "reasoning"` frame, parked on the active assistant bubble as a collapsible `Thinking` group above the answer. CLI keeps its existing direct path (it doesn't go through the bus). `ChannelsConfig.show_reasoning` flips to `true` by default — only adapted channels surface anything, others stay quiet. Loop net diff is -3 lines: the `channels_config.show_reasoning` check moves out, leaving emit_reasoning a one-liner that publishes and trusts the channel to decide. Co-authored-by: Cursor --- docs/configuration.md | 2 +- nanobot/agent/loop.py | 11 +- nanobot/channels/base.py | 13 ++ nanobot/channels/manager.py | 20 +- nanobot/channels/websocket.py | 24 +++ nanobot/config/schema.py | 2 +- .../test_channel_manager_reasoning.py | 183 ++++++++++++++++++ tests/channels/test_websocket_channel.py | 54 ++++++ webui/src/components/MessageBubble.tsx | 60 +++++- webui/src/hooks/useNanobotStream.ts | 35 +++- webui/src/i18n/locales/en/common.json | 1 + webui/src/i18n/locales/zh-CN/common.json | 1 + webui/src/lib/types.ts | 6 +- webui/src/tests/message-bubble.test.tsx | 33 ++++ webui/src/tests/useNanobotStream.test.tsx | 72 +++++++ 15 files changed, 504 insertions(+), 13 deletions(-) create mode 100644 tests/channels/test_channel_manager_reasoning.py diff --git a/docs/configuration.md b/docs/configuration.md index 85091d1f7..ed5a534cf 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -743,7 +743,7 @@ Global settings that apply to all channels. Configure under the `channels` secti |---------|---------|-------------| | `sendProgress` | `true` | Stream agent's text progress to the channel | | `sendToolHints` | `false` | Stream tool-call hints (e.g. `read_file("…")`) | -| `showReasoning` | `false` | Surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `` tags). Independent of `sendProgress`. | +| `showReasoning` | `true` | Allow channels to surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `` tags). The setting is a plugin opt-in: even when `true`, a channel only renders reasoning if it overrides `send_reasoning()`. Currently surfaced on CLI and WebSocket/WebUI; other channels (Telegram, Slack, Discord, ...) keep it as a silent no-op until their bubble UI is adapted. Independent of `sendProgress`. | | `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) | | `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key is auto-resolved from the matching provider config. | | `transcriptionLanguage` | `null` | Optional ISO-639-1 language hint for audio transcription, e.g. `"en"`, `"ko"`, `"ja"`. | diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index c7091a5f6..e7b045f01 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -147,10 +147,13 @@ class _LoopHook(AgentHook): ) async def emit_reasoning(self, reasoning_content: str | None) -> None: - """Send reasoning/thinking content as progress before the main answer.""" - ch = self._loop.channels_config - if not ch or not ch.show_reasoning: - return + """Publish reasoning content; channel plugins decide whether to render. + + The loop is intentionally not the gate: ``ChannelsConfig.show_reasoning`` + is a default that ``ChannelManager`` and ``BaseChannel.send_reasoning`` + consult per channel. A channel without a low-emphasis UI primitive + keeps the base no-op and the content drops at the dispatch boundary. + """ if self._on_progress and reasoning_content: await self._on_progress(reasoning_content, reasoning=True) diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py index 087677494..c82003d88 100644 --- a/nanobot/channels/base.py +++ b/nanobot/channels/base.py @@ -28,6 +28,7 @@ class BaseChannel(ABC): transcription_language: str | None = None send_progress: bool = True send_tool_hints: bool = False + show_reasoning: bool = True def __init__(self, config: Any, bus: MessageBus): """ @@ -120,6 +121,18 @@ class BaseChannel(ABC): """ pass + async def send_reasoning(self, msg: OutboundMessage) -> None: + """Surface model reasoning/thinking content. + + Default is no-op. Channels with a native low-emphasis primitive + (Slack context block, Telegram expandable blockquote, Discord + subtext, WebUI italic bubble, ...) override to render reasoning + as a subordinate trace. Channels without a suitable affordance + keep this no-op: silently dropping is better than leaking raw + model thoughts as regular conversational messages. + """ + return + @property def supports_streaming(self) -> bool: """True when config enables streaming AND this subclass implements send_delta.""" diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index 1d92bb879..abf9bf043 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -36,6 +36,7 @@ _SEND_RETRY_DELAYS = (1, 2, 4) _BOOL_CAMEL_ALIASES: dict[str, str] = { "send_progress": "sendProgress", "send_tool_hints": "sendToolHints", + "show_reasoning": "showReasoning", } class ChannelManager: @@ -104,6 +105,9 @@ class ChannelManager: channel.send_tool_hints = self._resolve_bool_override( section, "send_tool_hints", self.config.channels.send_tool_hints, ) + channel.show_reasoning = self._resolve_bool_override( + section, "show_reasoning", self.config.channels.show_reasoning, + ) self.channels[name] = channel logger.info("{} channel enabled", cls.display_name) except Exception as e: @@ -279,6 +283,18 @@ class ChannelManager: timeout=1.0 ) + if msg.metadata.get("_reasoning"): + # Reasoning rides its own plugin channel: only delivered when + # the destination channel both opts in (``show_reasoning``) + # and overrides ``send_reasoning``. Channels without a + # low-emphasis UI primitive keep the base no-op and the + # content silently drops here rather than leak as a + # conversational reply. + channel = self.channels.get(msg.channel) + if channel is not None and channel.show_reasoning: + await self._send_with_retry(channel, msg) + continue + if msg.metadata.get("_progress"): if msg.metadata.get("_tool_hint") and not self._should_send_progress( msg.channel, tool_hint=True, @@ -329,7 +345,9 @@ class ChannelManager: @staticmethod async def _send_once(channel: BaseChannel, msg: OutboundMessage) -> None: """Send one outbound message without retry policy.""" - if msg.metadata.get("_stream_delta") or msg.metadata.get("_stream_end"): + if msg.metadata.get("_reasoning"): + await channel.send_reasoning(msg) + elif msg.metadata.get("_stream_delta") or msg.metadata.get("_stream_end"): await channel.send_delta(msg.chat_id, msg.content, msg.metadata) elif not msg.metadata.get("_streamed"): await channel.send(msg) diff --git a/nanobot/channels/websocket.py b/nanobot/channels/websocket.py index 76ca513d0..bba68397f 100644 --- a/nanobot/channels/websocket.py +++ b/nanobot/channels/websocket.py @@ -1487,6 +1487,30 @@ class WebSocketChannel(BaseChannel): for connection in conns: await self._safe_send_to(connection, raw, label=" ") + async def send_reasoning(self, msg: OutboundMessage) -> None: + """Stream model reasoning as a subordinate trace frame. + + Renders as ``kind=reasoning`` alongside the existing ``tool_hint`` / + ``progress`` frames; the WebUI mounts these on the active assistant + bubble rather than as a conversational reply. + """ + conns = list(self._subs.get(msg.chat_id, ())) + if not conns: + return + if not msg.content: + return + payload: dict[str, Any] = { + "event": "message", + "chat_id": msg.chat_id, + "text": msg.content, + "kind": "reasoning", + } + if msg.reply_to: + payload["reply_to"] = msg.reply_to + raw = json.dumps(payload, ensure_ascii=False) + for connection in conns: + await self._safe_send_to(connection, raw, label=" reasoning ") + async def send_delta( self, chat_id: str, diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 72110eedd..ff7454d71 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -35,7 +35,7 @@ class ChannelsConfig(Base): send_progress: bool = True # stream agent's text progress to the channel send_tool_hints: bool = False # stream tool-call hints (e.g. read_file("…")) - show_reasoning: bool = False # show model reasoning/thinking content + show_reasoning: bool = True # surface model reasoning when channel implements it send_max_retries: int = Field(default=3, ge=0, le=10) # Max delivery attempts (initial send included) transcription_provider: str = "groq" # Voice transcription backend: "groq" or "openai" transcription_language: str | None = Field(default=None, pattern=r"^[a-z]{2,3}$") # Optional ISO-639-1 hint for audio transcription diff --git a/tests/channels/test_channel_manager_reasoning.py b/tests/channels/test_channel_manager_reasoning.py new file mode 100644 index 000000000..2200f4be2 --- /dev/null +++ b/tests/channels/test_channel_manager_reasoning.py @@ -0,0 +1,183 @@ +"""Tests for ChannelManager routing of model reasoning content. + +Reasoning is delivered as a separate plugin action (``send_reasoning``) +rather than a metadata flag on a regular outbound. The manager routes +``_reasoning`` messages only to channels that opt in via +``channel.show_reasoning``; channels without a low-emphasis UI primitive +keep the base no-op and the content silently drops at dispatch. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock + +import pytest + +from nanobot.bus.events import OutboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.channels.base import BaseChannel +from nanobot.channels.manager import ChannelManager +from nanobot.config.schema import Config + + +class _MockChannel(BaseChannel): + name = "mock" + display_name = "Mock" + + def __init__(self, config, bus): + super().__init__(config, bus) + self._send_mock = AsyncMock() + self._send_reasoning_mock = AsyncMock() + + async def start(self): # pragma: no cover - not exercised + pass + + async def stop(self): # pragma: no cover - not exercised + pass + + async def send(self, msg): + return await self._send_mock(msg) + + async def send_reasoning(self, msg): + return await self._send_reasoning_mock(msg) + + +@pytest.fixture +def manager() -> ChannelManager: + mgr = ChannelManager(Config(), MessageBus()) + mgr.channels["mock"] = _MockChannel({}, mgr.bus) + return mgr + + +@pytest.mark.asyncio +async def test_reasoning_routes_to_send_reasoning_not_send(manager): + channel = manager.channels["mock"] + msg = OutboundMessage( + channel="mock", + chat_id="c1", + content="step-by-step thinking", + metadata={"_progress": True, "_reasoning": True}, + ) + await manager._send_once(channel, msg) + channel._send_reasoning_mock.assert_awaited_once_with(msg) + channel._send_mock.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_dispatch_drops_reasoning_when_channel_opts_out(manager): + channel = manager.channels["mock"] + channel.show_reasoning = False + msg = OutboundMessage( + channel="mock", + chat_id="c1", + content="hidden thinking", + metadata={"_progress": True, "_reasoning": True}, + ) + await manager.bus.publish_outbound(msg) + + pumped = await _pump_one(manager) + + assert pumped is True + channel._send_reasoning_mock.assert_not_awaited() + channel._send_mock.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_dispatch_delivers_reasoning_when_channel_opts_in(manager): + channel = manager.channels["mock"] + channel.show_reasoning = True + msg = OutboundMessage( + channel="mock", + chat_id="c1", + content="visible thinking", + metadata={"_progress": True, "_reasoning": True}, + ) + await manager.bus.publish_outbound(msg) + + pumped = await _pump_one(manager) + + assert pumped is True + channel._send_reasoning_mock.assert_awaited_once() + delivered = channel._send_reasoning_mock.await_args.args[0] + assert delivered.content == "visible thinking" + + +@pytest.mark.asyncio +async def test_dispatch_silently_drops_reasoning_for_unknown_channel(manager): + msg = OutboundMessage( + channel="ghost", + chat_id="c1", + content="nobody home", + metadata={"_progress": True, "_reasoning": True}, + ) + await manager.bus.publish_outbound(msg) + + pumped = await _pump_one(manager) + + assert pumped is True + # Mock channel must not receive anything destined for a different channel. + manager.channels["mock"]._send_reasoning_mock.assert_not_awaited() + manager.channels["mock"]._send_mock.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_base_channel_send_reasoning_is_noop_safe(): + """Plugins that don't override `send_reasoning` must not blow up.""" + + class _Plain(BaseChannel): + name = "plain" + display_name = "Plain" + + async def start(self): # pragma: no cover + pass + + async def stop(self): # pragma: no cover + pass + + async def send(self, msg): # pragma: no cover + pass + + channel = _Plain({}, MessageBus()) + # No exception, returns None. + assert await channel.send_reasoning( + OutboundMessage(channel="plain", chat_id="c", content="x", metadata={}) + ) is None + + +@pytest.mark.asyncio +async def test_reasoning_routing_does_not_consult_send_progress(manager): + """`show_reasoning` is orthogonal to `send_progress` — turning off + progress streaming must not silence reasoning.""" + channel = manager.channels["mock"] + channel.send_progress = False + channel.show_reasoning = True + msg = OutboundMessage( + channel="mock", + chat_id="c1", + content="still surfaces", + metadata={"_progress": True, "_reasoning": True}, + ) + await manager.bus.publish_outbound(msg) + + pumped = await _pump_one(manager) + + assert pumped is True + channel._send_reasoning_mock.assert_awaited_once() + + +async def _pump_one(manager: ChannelManager) -> bool: + """Drive the dispatcher for exactly one message, then cancel.""" + import asyncio + + task = asyncio.create_task(manager._dispatch_outbound()) + # Yield control until the queue drains. + for _ in range(50): + await asyncio.sleep(0.01) + if manager.bus.outbound.qsize() == 0: + break + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + return True diff --git a/tests/channels/test_websocket_channel.py b/tests/channels/test_websocket_channel.py index 92b61f7d6..0e682ed0a 100644 --- a/tests/channels/test_websocket_channel.py +++ b/tests/channels/test_websocket_channel.py @@ -358,6 +358,60 @@ async def test_send_delta_emits_delta_and_stream_end() -> None: assert second["stream_id"] == "sid" +@pytest.mark.asyncio +async def test_send_reasoning_emits_reasoning_kind_frame() -> None: + bus = MagicMock() + channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus) + mock_ws = AsyncMock() + channel._attach(mock_ws, "chat-1") + + await channel.send_reasoning(OutboundMessage( + channel="websocket", + chat_id="chat-1", + content="step-by-step thinking", + metadata={"_progress": True, "_reasoning": True}, + )) + + mock_ws.send.assert_awaited_once() + payload = json.loads(mock_ws.send.await_args.args[0]) + assert payload["event"] == "message" + assert payload["chat_id"] == "chat-1" + assert payload["text"] == "step-by-step thinking" + assert payload["kind"] == "reasoning" + + +@pytest.mark.asyncio +async def test_send_reasoning_drops_empty_content() -> None: + """Empty reasoning emits nothing — keeps the frontend bubble clean.""" + bus = MagicMock() + channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus) + mock_ws = AsyncMock() + channel._attach(mock_ws, "chat-1") + + await channel.send_reasoning(OutboundMessage( + channel="websocket", + chat_id="chat-1", + content="", + metadata={"_reasoning": True}, + )) + + mock_ws.send.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_send_reasoning_without_subscribers_is_noop() -> None: + bus = MagicMock() + channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus) + + await channel.send_reasoning(OutboundMessage( + channel="websocket", + chat_id="unattached", + content="thinking", + metadata={"_reasoning": True}, + )) + # No subscribers, no exception, no send. + + @pytest.mark.asyncio async def test_send_turn_end_emits_turn_end_event() -> None: bus = MagicMock() diff --git a/webui/src/components/MessageBubble.tsx b/webui/src/components/MessageBubble.tsx index 3bd580567..556460824 100644 --- a/webui/src/components/MessageBubble.tsx +++ b/webui/src/components/MessageBubble.tsx @@ -1,5 +1,5 @@ -import { useCallback, useEffect, useRef, useState } from "react"; -import { Check, ChevronRight, Copy, FileIcon, ImageIcon, PlaySquare, Wrench } from "lucide-react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { Check, ChevronRight, Copy, FileIcon, ImageIcon, PlaySquare, Sparkles, Wrench } from "lucide-react"; import { useTranslation } from "react-i18next"; import { ImageLightbox } from "@/components/ImageLightbox"; @@ -85,12 +85,14 @@ export function MessageBubble({ message }: MessageBubbleProps) { const empty = message.content.trim().length === 0; const media = message.media ?? []; + const reasoning = message.role === "assistant" ? message.reasoning ?? [] : []; const showAssistantActions = message.role === "assistant" && !message.isStreaming && !empty; return (
- {empty && message.isStreaming ? ( + {reasoning.length > 0 ? : null} + {empty && message.isStreaming && reasoning.length === 0 ? ( - ) : ( + ) : empty && message.isStreaming ? null : ( <> {message.content} {message.isStreaming && } @@ -433,3 +435,53 @@ function TraceGroup({ message, animClass }: TraceGroupProps) {
); } + +interface ReasoningBubbleProps { + lines: string[]; +} + +/** + * Subordinate "thinking" trace shown above an assistant turn. Mirrors the + * CLI's italic dim ``ChevronRight`` row visually; collapsible because + * reasoning from models like DeepSeek-R1 / o-series can run long. Defaults + * to expanded while the answer is still streaming (so the user sees the + * model "thinking out loud"), but the toggle persists across rerenders. + */ +function ReasoningBubble({ lines }: ReasoningBubbleProps) { + const { t } = useTranslation(); + const [open, setOpen] = useState(true); + const text = useMemo(() => lines.join("\n\n"), [lines]); + return ( +
+ + {open && ( +
+ {text} +
+ )} +
+ ); +} diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts index 8ec1a9ac4..ee460cf56 100644 --- a/webui/src/hooks/useNanobotStream.ts +++ b/webui/src/hooks/useNanobotStream.ts @@ -183,10 +183,43 @@ export function useNanobotStream( if (ev.event === "message") { if ( suppressStreamUntilTurnEndRef.current && - (ev.kind === "tool_hint" || ev.kind === "progress") + (ev.kind === "tool_hint" || ev.kind === "progress" || ev.kind === "reasoning") ) { return; } + // Model reasoning rides its own channel: stash it on the next + // assistant turn so the bubble renders it as a subordinate trace. + // If the assistant message hasn't materialized yet (typical, since + // reasoning fires before tool calls/answers), park it on a sentinel + // pending row that the next assistant message absorbs. + if (ev.kind === "reasoning") { + const line = ev.text; + if (!line) return; + setMessages((prev) => { + for (let i = prev.length - 1; i >= 0; i -= 1) { + const candidate = prev[i]; + if (candidate.role === "assistant" && candidate.kind !== "trace") { + const merged: UIMessage = { + ...candidate, + reasoning: [...(candidate.reasoning ?? []), line], + }; + return [...prev.slice(0, i), merged, ...prev.slice(i + 1)]; + } + } + return [ + ...prev, + { + id: crypto.randomUUID(), + role: "assistant", + content: "", + isStreaming: true, + reasoning: [line], + createdAt: Date.now(), + }, + ]; + }); + return; + } // Intermediate agent breadcrumbs (tool-call hints, raw progress). // Attach them to the last trace row if it was the last emitted item // so a sequence of calls collapses into one compact trace group. diff --git a/webui/src/i18n/locales/en/common.json b/webui/src/i18n/locales/en/common.json index 4cf1b6391..1f6eb7b54 100644 --- a/webui/src/i18n/locales/en/common.json +++ b/webui/src/i18n/locales/en/common.json @@ -332,6 +332,7 @@ "assistantTyping": "Assistant is typing", "toolSingle": "Using a tool", "toolMany": "Used {{count}} tools", + "reasoning": "Thinking", "imageAttachment": "Image attachment", "copyReply": "Copy reply", "copiedReply": "Copied reply" diff --git a/webui/src/i18n/locales/zh-CN/common.json b/webui/src/i18n/locales/zh-CN/common.json index fed932f29..662a5f7bd 100644 --- a/webui/src/i18n/locales/zh-CN/common.json +++ b/webui/src/i18n/locales/zh-CN/common.json @@ -320,6 +320,7 @@ "assistantTyping": "助手正在输入", "toolSingle": "正在使用工具", "toolMany": "已使用 {{count}} 个工具", + "reasoning": "思考中", "imageAttachment": "图片附件", "copyReply": "复制回复", "copiedReply": "已复制回复" diff --git a/webui/src/lib/types.ts b/webui/src/lib/types.ts index 5e7dc9288..0338b75f3 100644 --- a/webui/src/lib/types.ts +++ b/webui/src/lib/types.ts @@ -44,6 +44,10 @@ export interface UIMessage { images?: UIImage[]; /** Signed or local UI-renderable media attachments. */ media?: UIMediaAttachment[]; + /** Assistant turn: model reasoning / thinking content collected from + * `kind: "reasoning"` frames. Each entry is one emit cycle, joined with + * blank lines on render. */ + reasoning?: string[]; } export interface ChatSummary { @@ -141,7 +145,7 @@ export type InboundEvent = media_urls?: Array<{ url: string; name?: string }>; /** Present when the frame is an agent breadcrumb (e.g. tool hint, * generic progress line) rather than a conversational reply. */ - kind?: "tool_hint" | "progress"; + kind?: "tool_hint" | "progress" | "reasoning"; } | { event: "delta"; diff --git a/webui/src/tests/message-bubble.test.tsx b/webui/src/tests/message-bubble.test.tsx index 35cdaed40..77608b121 100644 --- a/webui/src/tests/message-bubble.test.tsx +++ b/webui/src/tests/message-bubble.test.tsx @@ -103,6 +103,39 @@ describe("MessageBubble", () => { expect(container.querySelector("video[controls]")).toBeInTheDocument(); }); + it("surfaces reasoning content above the assistant answer when provided", () => { + const message: UIMessage = { + id: "a-reasoning", + role: "assistant", + content: "The answer is 42.", + createdAt: Date.now(), + reasoning: ["Step 1: parse intent.", "Step 2: compute."], + }; + + render(); + + expect(screen.getByText("Thinking")).toBeInTheDocument(); + expect(screen.getByText(/Step 1: parse intent\./)).toBeInTheDocument(); + expect(screen.getByText(/Step 2: compute\./)).toBeInTheDocument(); + expect(screen.getByText("The answer is 42.")).toBeInTheDocument(); + }); + + it("collapses the reasoning section when toggled", () => { + const message: UIMessage = { + id: "a-reasoning-collapse", + role: "assistant", + content: "done", + createdAt: Date.now(), + reasoning: ["hidden after toggle"], + }; + + render(); + + expect(screen.getByText("hidden after toggle")).toBeInTheDocument(); + fireEvent.click(screen.getByRole("button", { name: /thinking/i })); + expect(screen.queryByText("hidden after toggle")).not.toBeInTheDocument(); + }); + it("renders assistant image media as a larger generated result", () => { const message: UIMessage = { id: "a-image", diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx index 60e6ada62..7fb94063c 100644 --- a/webui/src/tests/useNanobotStream.test.tsx +++ b/webui/src/tests/useNanobotStream.test.tsx @@ -113,6 +113,78 @@ describe("useNanobotStream", () => { expect(result.current.messages[1].kind).toBeUndefined(); }); + it("parks reasoning frames on a placeholder assistant message until the answer arrives", () => { + const fake = fakeClient(); + const { result } = renderHook(() => useNanobotStream("chat-r", EMPTY_MESSAGES), { + wrapper: wrap(fake.client), + }); + + act(() => { + fake.emit("chat-r", { + event: "message", + chat_id: "chat-r", + text: "Let me think step by step.", + kind: "reasoning", + }); + fake.emit("chat-r", { + event: "message", + chat_id: "chat-r", + text: "First, decompose the request.", + kind: "reasoning", + }); + }); + + expect(result.current.messages).toHaveLength(1); + expect(result.current.messages[0].role).toBe("assistant"); + expect(result.current.messages[0].reasoning).toEqual([ + "Let me think step by step.", + "First, decompose the request.", + ]); + }); + + it("attaches reasoning to the latest assistant turn rather than spawning a new one", () => { + const fake = fakeClient(); + const { result } = renderHook(() => useNanobotStream("chat-r2", EMPTY_MESSAGES), { + wrapper: wrap(fake.client), + }); + + act(() => { + fake.emit("chat-r2", { + event: "message", + chat_id: "chat-r2", + text: "The answer is 42.", + }); + fake.emit("chat-r2", { + event: "message", + chat_id: "chat-r2", + text: "Reasoning surfaced post-hoc.", + kind: "reasoning", + }); + }); + + expect(result.current.messages).toHaveLength(1); + expect(result.current.messages[0].content).toBe("The answer is 42."); + expect(result.current.messages[0].reasoning).toEqual(["Reasoning surfaced post-hoc."]); + }); + + it("ignores empty reasoning frames", () => { + const fake = fakeClient(); + const { result } = renderHook(() => useNanobotStream("chat-r3", EMPTY_MESSAGES), { + wrapper: wrap(fake.client), + }); + + act(() => { + fake.emit("chat-r3", { + event: "message", + chat_id: "chat-r3", + text: "", + kind: "reasoning", + }); + }); + + expect(result.current.messages).toHaveLength(0); + }); + it("attaches assistant media_urls to complete messages", () => { const fake = fakeClient(); const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), { From 458b4ba235b40e00139386a2c767670b91384903 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 07:13:43 +0000 Subject: [PATCH 06/17] feat(reasoning): stream reasoning content as a first-class channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reasoning now flows as its own stream — symmetric to the answer's ``delta`` / ``stream_end`` pair — instead of being shipped as one oversized progress message. This lets WebUI render a live "Thinking…" bubble that updates in place, then auto-collapses when the stream closes. Other channels remain plugin no-ops by default. ## Protocol New metadata: ``_reasoning_delta`` (chunk) and ``_reasoning_end`` (close marker). ChannelManager routes both to the dedicated plugin hooks below; the legacy one-shot ``_reasoning`` is kept for back-compat and BaseChannel expands it into a single delta + end pair so plugins only ever implement the streaming primitives. WebSocket emits two new events: - ``reasoning_delta`` (event, chat_id, text, optional stream_id) - ``reasoning_end`` (event, chat_id, optional stream_id) ## BaseChannel surface - ``send_reasoning_delta(chat_id, delta, metadata)`` — no-op default - ``send_reasoning_end(chat_id, metadata)`` — no-op default - ``send_reasoning(msg)`` — back-compat wrapper, base impl forwards to the streaming primitives A channel adds reasoning support by overriding the two streaming primitives. Telegram / Slack / Discord / Feishu / WeChat / Matrix keep the base no-ops until their bubble UIs are adapted; reasoning silently drops at dispatch, never as a stray text message. ## AgentHook Adds ``emit_reasoning_end`` to the hook lifecycle. ``_LoopHook`` tracks whether a reasoning segment is open and closes it on: - the first answer delta arriving (so the UI locks the bubble before the answer renders below), - ``on_stream_end``, - one-shot ``reasoning_content`` / ``thinking_blocks`` after a single non-streaming response. ## WebUI - ``UIMessage.reasoning`` is now a single accumulated string with a companion ``reasoningStreaming`` flag. - ``useNanobotStream`` consumes ``reasoning_delta`` / ``reasoning_end``; legacy ``kind: "reasoning"`` is auto-translated to a delta + end. - New ``ReasoningBubble``: shimmer header + auto-expanded while streaming, collapses to a clickable "Thinking" pill once closed, respects ``prefers-reduced-motion``. - Answer deltas adopt the reasoning placeholder so the bubble and the answer share one assistant row. ## Tests - ``tests/channels/test_channel_manager_reasoning.py`` — manager routes delta + end, drops on channel opt-out, expands one-shot back-compat. - ``tests/channels/test_websocket_channel.py`` — new ``reasoning_delta`` / ``reasoning_end`` frames, empty-chunk safety, no-subscriber safety, back-compat expansion. - ``tests/agent/test_runner_reasoning.py`` — runner closes the segment on streaming answer start and after one-shot reasoning. - WebUI ``useNanobotStream`` + ``message-bubble`` cover the new protocol and the shimmer styling. ## Docs ``docs/configuration.md`` and ``docs/websocket.md`` document the new events and the plugin contract. Co-authored-by: Cursor --- docs/configuration.md | 2 +- docs/websocket.md | 23 +++ nanobot/agent/hook.py | 11 ++ nanobot/agent/loop.py | 36 +++- nanobot/agent/runner.py | 18 +- nanobot/channels/base.py | 45 ++++- nanobot/channels/manager.py | 28 ++- nanobot/channels/websocket.py | 60 ++++-- tests/agent/test_runner_reasoning.py | 42 ++++ .../test_channel_manager_reasoning.py | 139 +++++++++----- tests/channels/test_websocket_channel.py | 63 ++++-- webui/src/components/MessageBubble.tsx | 57 ++++-- webui/src/globals.css | 28 +++ webui/src/hooks/useNanobotStream.ts | 180 +++++++++++++----- webui/src/i18n/locales/en/common.json | 1 + webui/src/i18n/locales/zh-CN/common.json | 3 +- webui/src/lib/types.ts | 22 ++- webui/src/tests/message-bubble.test.tsx | 42 ++-- webui/src/tests/useNanobotStream.test.tsx | 70 ++++--- 19 files changed, 649 insertions(+), 221 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index ed5a534cf..0123017d2 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -743,7 +743,7 @@ Global settings that apply to all channels. Configure under the `channels` secti |---------|---------|-------------| | `sendProgress` | `true` | Stream agent's text progress to the channel | | `sendToolHints` | `false` | Stream tool-call hints (e.g. `read_file("…")`) | -| `showReasoning` | `true` | Allow channels to surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `` tags). The setting is a plugin opt-in: even when `true`, a channel only renders reasoning if it overrides `send_reasoning()`. Currently surfaced on CLI and WebSocket/WebUI; other channels (Telegram, Slack, Discord, ...) keep it as a silent no-op until their bubble UI is adapted. Independent of `sendProgress`. | +| `showReasoning` | `true` | Allow channels to surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `` tags). Reasoning flows as a dedicated stream with `_reasoning_delta` / `_reasoning_end` markers — channels override `send_reasoning_delta` / `send_reasoning_end` to render in-place updates. Even with `true`, channels without those overrides stay no-op silently. Currently surfaced on CLI and WebSocket/WebUI (italic shimmer header, auto-collapses after the stream ends); Telegram / Slack / Discord / Feishu / WeChat / Matrix keep the base no-op until their bubble UI is adapted. Independent of `sendProgress`. | | `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) | | `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key is auto-resolved from the matching provider config. | | `transcriptionLanguage` | `null` | Optional ISO-639-1 language hint for audio transcription, e.g. `"en"`, `"ko"`, `"ja"`. | diff --git a/docs/websocket.md b/docs/websocket.md index 556bb5bb6..d6a816ac1 100644 --- a/docs/websocket.md +++ b/docs/websocket.md @@ -128,6 +128,29 @@ All frames are JSON text. Each message has an `event` field. } ``` +**`reasoning_delta`** — incremental model reasoning / thinking chunk for the active assistant turn. Mirrors `delta` but targets the reasoning bubble above the answer rather than the answer body: + +```json +{ + "event": "reasoning_delta", + "chat_id": "uuid-v4", + "text": "Let me decompose ", + "stream_id": "r1" +} +``` + +**`reasoning_end`** — close marker for the active reasoning stream. WebUI uses this to lock the in-place bubble and switch from the shimmer header to a static collapsed state: + +```json +{ + "event": "reasoning_end", + "chat_id": "uuid-v4", + "stream_id": "r1" +} +``` + +Reasoning frames only flow when the channel's `showReasoning` is `true` (default) and the model returns reasoning content (DeepSeek-R1 / Kimi / MiMo / OpenAI reasoning models, Anthropic extended thinking, or inline `` / `` tags). Models without reasoning produce zero `reasoning_delta` frames. + **`runtime_model_updated`** — broadcast when the gateway runtime model changes, for example after `/model `: ```json diff --git a/nanobot/agent/hook.py b/nanobot/agent/hook.py index 86775742d..5b6fed445 100644 --- a/nanobot/agent/hook.py +++ b/nanobot/agent/hook.py @@ -52,6 +52,14 @@ class AgentHook: async def emit_reasoning(self, reasoning_content: str | None) -> None: pass + async def emit_reasoning_end(self) -> None: + """Mark the end of an in-flight reasoning stream. + + Hooks that buffer ``emit_reasoning`` chunks (for in-place UI updates) + flush and freeze the rendered group here. One-shot hooks ignore. + """ + pass + async def after_iteration(self, context: AgentHookContext) -> None: pass @@ -102,6 +110,9 @@ class CompositeHook(AgentHook): async def emit_reasoning(self, reasoning_content: str | None) -> None: await self._for_each_hook_safe("emit_reasoning", reasoning_content) + async def emit_reasoning_end(self) -> None: + await self._for_each_hook_safe("emit_reasoning_end") + async def after_iteration(self, context: AgentHookContext) -> None: await self._for_each_hook_safe("after_iteration", context) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index e7b045f01..7897f89dd 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -87,6 +87,7 @@ class _LoopHook(AgentHook): self._session_key = session_key self._stream_buf = "" self._think_extractor = IncrementalThinkExtractor() + self._reasoning_open = False def wants_streaming(self) -> bool: return self._on_stream is not None @@ -102,10 +103,15 @@ class _LoopHook(AgentHook): if await self._think_extractor.feed(self._stream_buf, self.emit_reasoning): context.streamed_reasoning = True - if incremental and self._on_stream: - await self._on_stream(incremental) + if incremental: + # Answer text has started — close any open reasoning segment so + # the UI can lock the bubble before the answer renders below it. + await self.emit_reasoning_end() + if self._on_stream: + await self._on_stream(incremental) async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None: + await self.emit_reasoning_end() if self._on_stream_end: await self._on_stream_end(resuming=resuming) self._stream_buf = "" @@ -147,16 +153,27 @@ class _LoopHook(AgentHook): ) async def emit_reasoning(self, reasoning_content: str | None) -> None: - """Publish reasoning content; channel plugins decide whether to render. + """Publish a reasoning chunk; channel plugins decide whether to render. - The loop is intentionally not the gate: ``ChannelsConfig.show_reasoning`` - is a default that ``ChannelManager`` and ``BaseChannel.send_reasoning`` - consult per channel. A channel without a low-emphasis UI primitive - keeps the base no-op and the content drops at the dispatch boundary. + Each call is one delta in a streaming session. ``emit_reasoning_end`` + closes the segment. The loop is intentionally not the gate: + ``ChannelsConfig.show_reasoning`` is a default that ``ChannelManager`` + and ``BaseChannel.send_reasoning_delta`` consult per channel — a + channel without a low-emphasis UI primitive keeps the base no-op + and the content drops at the dispatch boundary. """ if self._on_progress and reasoning_content: + self._reasoning_open = True await self._on_progress(reasoning_content, reasoning=True) + async def emit_reasoning_end(self) -> None: + """Close the current reasoning stream segment, if any was open.""" + if self._reasoning_open and self._on_progress: + self._reasoning_open = False + await self._on_progress("", reasoning_end=True) + else: + self._reasoning_open = False + async def after_iteration(self, context: AgentHookContext) -> None: if ( self._on_progress @@ -665,12 +682,15 @@ class AgentLoop: tool_hint: bool = False, tool_events: list[dict[str, Any]] | None = None, reasoning: bool = False, + reasoning_end: bool = False, ) -> None: meta = dict(msg.metadata or {}) meta["_progress"] = True meta["_tool_hint"] = tool_hint if reasoning: - meta["_reasoning"] = True + meta["_reasoning_delta"] = True + if reasoning_end: + meta["_reasoning_end"] = True if tool_events: meta["_tool_events"] = tool_events await self.bus.publish_outbound( diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index 6b8e5383c..37da63872 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -291,6 +291,7 @@ class AgentRunner: response.content = cleaned_content if reasoning_text and not context.streamed_reasoning: await hook.emit_reasoning(reasoning_text) + await hook.emit_reasoning_end() context.streamed_reasoning = True if response.should_execute_tools: @@ -617,6 +618,8 @@ class AgentRunner: and getattr(self.provider, "supports_progress_deltas", False) is True ) + progress_state: dict[str, bool] | None = None + if wants_streaming: async def _stream(delta: str) -> None: if delta: @@ -630,6 +633,7 @@ class AgentRunner: elif wants_progress_streaming: stream_buf = "" think_extractor = IncrementalThinkExtractor() + progress_state = {"reasoning_open": False} async def _stream_progress(delta: str) -> None: nonlocal stream_buf @@ -642,8 +646,12 @@ class AgentRunner: if await think_extractor.feed(stream_buf, hook.emit_reasoning): context.streamed_reasoning = True + progress_state["reasoning_open"] = True if incremental: + if progress_state["reasoning_open"]: + await hook.emit_reasoning_end() + progress_state["reasoning_open"] = False context.streamed_content = True await spec.progress_callback(incremental) @@ -654,16 +662,20 @@ class AgentRunner: else: coro = self.provider.chat_with_retry(**kwargs) - if timeout_s is None: - return await coro try: - return await asyncio.wait_for(coro, timeout=timeout_s) + response = ( + await coro if timeout_s is None + else await asyncio.wait_for(coro, timeout=timeout_s) + ) except asyncio.TimeoutError: return LLMResponse( content=f"Error calling LLM: timed out after {timeout_s:g}s", finish_reason="error", error_kind="timeout", ) + if progress_state and progress_state.get("reasoning_open"): + await hook.emit_reasoning_end() + return response async def _request_finalization_retry( self, diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py index c82003d88..257127d5a 100644 --- a/nanobot/channels/base.py +++ b/nanobot/channels/base.py @@ -121,18 +121,53 @@ class BaseChannel(ABC): """ pass - async def send_reasoning(self, msg: OutboundMessage) -> None: - """Surface model reasoning/thinking content. + async def send_reasoning_delta( + self, chat_id: str, delta: str, metadata: dict[str, Any] | None = None + ) -> None: + """Stream a chunk of model reasoning/thinking content. Default is no-op. Channels with a native low-emphasis primitive (Slack context block, Telegram expandable blockquote, Discord subtext, WebUI italic bubble, ...) override to render reasoning - as a subordinate trace. Channels without a suitable affordance - keep this no-op: silently dropping is better than leaking raw - model thoughts as regular conversational messages. + as a subordinate trace that updates in place as the model thinks. + + Streaming contract mirrors :meth:`send_delta`: ``_reasoning_delta`` + is a chunk, ``_reasoning_end`` ends the current reasoning segment, + and stateful implementations should key buffers by ``_stream_id`` + rather than only by ``chat_id``. """ return + async def send_reasoning_end( + self, chat_id: str, metadata: dict[str, Any] | None = None + ) -> None: + """Mark the end of a reasoning stream segment. + + Default is no-op. Channels that buffer ``send_reasoning_delta`` + chunks for in-place updates use this signal to flush and freeze + the rendered group; one-shot channels can ignore it entirely. + """ + return + + async def send_reasoning(self, msg: OutboundMessage) -> None: + """Deliver a complete reasoning block. + + Default implementation reuses the streaming pair so plugins only + need to override the delta/end methods. Equivalent to one delta + with the full content followed immediately by an end marker — + keeps a single rendering path for both streamed and one-shot + reasoning (e.g. DeepSeek-R1's final-response ``reasoning_content``). + """ + if not msg.content: + return + meta = dict(msg.metadata or {}) + meta.setdefault("_reasoning_delta", True) + await self.send_reasoning_delta(msg.chat_id, msg.content, meta) + end_meta = dict(meta) + end_meta.pop("_reasoning_delta", None) + end_meta["_reasoning_end"] = True + await self.send_reasoning_end(msg.chat_id, end_meta) + @property def supports_streaming(self) -> bool: """True when config enables streaming AND this subclass implements send_delta.""" diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py index abf9bf043..3a6b6e50f 100644 --- a/nanobot/channels/manager.py +++ b/nanobot/channels/manager.py @@ -283,13 +283,18 @@ class ChannelManager: timeout=1.0 ) - if msg.metadata.get("_reasoning"): - # Reasoning rides its own plugin channel: only delivered when - # the destination channel both opts in (``show_reasoning``) - # and overrides ``send_reasoning``. Channels without a - # low-emphasis UI primitive keep the base no-op and the - # content silently drops here rather than leak as a - # conversational reply. + if ( + msg.metadata.get("_reasoning_delta") + or msg.metadata.get("_reasoning_end") + or msg.metadata.get("_reasoning") + ): + # Reasoning rides its own plugin channel: only delivered + # when the destination channel opts in via ``show_reasoning`` + # and overrides the streaming primitives. Channels without + # a low-emphasis UI affordance keep the base no-op and the + # content silently drops here. ``_reasoning`` (one-shot) + # is accepted for backward compatibility with hooks that + # haven't migrated to delta/end yet. channel = self.channels.get(msg.channel) if channel is not None and channel.show_reasoning: await self._send_with_retry(channel, msg) @@ -345,7 +350,14 @@ class ChannelManager: @staticmethod async def _send_once(channel: BaseChannel, msg: OutboundMessage) -> None: """Send one outbound message without retry policy.""" - if msg.metadata.get("_reasoning"): + if msg.metadata.get("_reasoning_end"): + await channel.send_reasoning_end(msg.chat_id, msg.metadata) + elif msg.metadata.get("_reasoning_delta"): + await channel.send_reasoning_delta(msg.chat_id, msg.content, msg.metadata) + elif msg.metadata.get("_reasoning"): + # Back-compat: one-shot reasoning. BaseChannel translates this + # to a single delta + end pair so plugins only implement the + # streaming primitives. await channel.send_reasoning(msg) elif msg.metadata.get("_stream_delta") or msg.metadata.get("_stream_end"): await channel.send_delta(msg.chat_id, msg.content, msg.metadata) diff --git a/nanobot/channels/websocket.py b/nanobot/channels/websocket.py index bba68397f..a77c8594f 100644 --- a/nanobot/channels/websocket.py +++ b/nanobot/channels/websocket.py @@ -1487,30 +1487,54 @@ class WebSocketChannel(BaseChannel): for connection in conns: await self._safe_send_to(connection, raw, label=" ") - async def send_reasoning(self, msg: OutboundMessage) -> None: - """Stream model reasoning as a subordinate trace frame. - - Renders as ``kind=reasoning`` alongside the existing ``tool_hint`` / - ``progress`` frames; the WebUI mounts these on the active assistant - bubble rather than as a conversational reply. + async def send_reasoning_delta( + self, + chat_id: str, + delta: str, + metadata: dict[str, Any] | None = None, + ) -> None: + """Push one chunk of model reasoning. Mirrors ``send_delta`` shape so + WebUI receives a stream that opens, updates in place, and closes — + rendered above the active assistant bubble with a shimmer header + until the matching ``reasoning_end`` arrives. """ - conns = list(self._subs.get(msg.chat_id, ())) - if not conns: + conns = list(self._subs.get(chat_id, ())) + if not conns or not delta: return - if not msg.content: - return - payload: dict[str, Any] = { - "event": "message", - "chat_id": msg.chat_id, - "text": msg.content, - "kind": "reasoning", + meta = metadata or {} + body: dict[str, Any] = { + "event": "reasoning_delta", + "chat_id": chat_id, + "text": delta, } - if msg.reply_to: - payload["reply_to"] = msg.reply_to - raw = json.dumps(payload, ensure_ascii=False) + stream_id = meta.get("_stream_id") + if stream_id is not None: + body["stream_id"] = stream_id + raw = json.dumps(body, ensure_ascii=False) for connection in conns: await self._safe_send_to(connection, raw, label=" reasoning ") + async def send_reasoning_end( + self, + chat_id: str, + metadata: dict[str, Any] | None = None, + ) -> None: + """Close the current reasoning stream segment for in-place renderers.""" + conns = list(self._subs.get(chat_id, ())) + if not conns: + return + meta = metadata or {} + body: dict[str, Any] = { + "event": "reasoning_end", + "chat_id": chat_id, + } + stream_id = meta.get("_stream_id") + if stream_id is not None: + body["stream_id"] = stream_id + raw = json.dumps(body, ensure_ascii=False) + for connection in conns: + await self._safe_send_to(connection, raw, label=" reasoning_end ") + async def send_delta( self, chat_id: str, diff --git a/tests/agent/test_runner_reasoning.py b/tests/agent/test_runner_reasoning.py index 512f3d2e9..d971e05a1 100644 --- a/tests/agent/test_runner_reasoning.py +++ b/tests/agent/test_runner_reasoning.py @@ -24,11 +24,15 @@ class _RecordingHook(AgentHook): def __init__(self) -> None: super().__init__() self.emitted: list[str] = [] + self.end_calls = 0 async def emit_reasoning(self, reasoning_content: str | None) -> None: if reasoning_content: self.emitted.append(reasoning_content) + async def emit_reasoning_end(self) -> None: + self.end_calls += 1 + @pytest.mark.asyncio async def test_runner_preserves_reasoning_fields_in_assistant_history(): @@ -277,3 +281,41 @@ async def test_runner_does_not_double_emit_when_inline_think_already_streamed(): assert result.final_content == "The answer." assert hook.emitted == ["working..."] + assert hook.end_calls >= 1, "reasoning stream must be closed once the answer starts" + + +@pytest.mark.asyncio +async def test_runner_closes_reasoning_stream_after_one_shot_response(): + """A non-streaming response carrying ``reasoning_content`` must emit + both a reasoning delta and an end marker so channels can finalize the + in-place bubble.""" + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + + async def chat_with_retry(**kwargs): + return LLMResponse( + content="answer", + reasoning_content="hidden thought", + tool_calls=[], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + + provider.chat_with_retry = chat_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + hook = _RecordingHook() + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "q"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=hook, + )) + + assert result.final_content == "answer" + assert hook.emitted == ["hidden thought"] + assert hook.end_calls == 1 diff --git a/tests/channels/test_channel_manager_reasoning.py b/tests/channels/test_channel_manager_reasoning.py index 2200f4be2..bc2a640c6 100644 --- a/tests/channels/test_channel_manager_reasoning.py +++ b/tests/channels/test_channel_manager_reasoning.py @@ -1,14 +1,22 @@ """Tests for ChannelManager routing of model reasoning content. -Reasoning is delivered as a separate plugin action (``send_reasoning``) -rather than a metadata flag on a regular outbound. The manager routes -``_reasoning`` messages only to channels that opt in via -``channel.show_reasoning``; channels without a low-emphasis UI primitive -keep the base no-op and the content silently drops at dispatch. +Reasoning is delivered through plugin streaming primitives +(``send_reasoning_delta`` / ``send_reasoning_end``) so each channel +controls in-place rendering — mirroring the existing answer ``send_delta`` +/ ``stream_end`` pair. The manager forwards reasoning frames only to +channels that opt in via ``channel.show_reasoning``; plugins without a +low-emphasis UI primitive keep the base no-op and the content silently +drops at dispatch. + +One-shot ``_reasoning`` frames are accepted for back-compat with hooks +that haven't migrated yet — ``BaseChannel.send_reasoning`` expands them +to a single delta + end pair so plugins only implement the streaming +primitives. """ from __future__ import annotations +import asyncio from unittest.mock import AsyncMock import pytest @@ -27,7 +35,8 @@ class _MockChannel(BaseChannel): def __init__(self, config, bus): super().__init__(config, bus) self._send_mock = AsyncMock() - self._send_reasoning_mock = AsyncMock() + self._delta_mock = AsyncMock() + self._end_mock = AsyncMock() async def start(self): # pragma: no cover - not exercised pass @@ -38,8 +47,11 @@ class _MockChannel(BaseChannel): async def send(self, msg): return await self._send_mock(msg) - async def send_reasoning(self, msg): - return await self._send_reasoning_mock(msg) + async def send_reasoning_delta(self, chat_id, delta, metadata=None): + return await self._delta_mock(chat_id, delta, metadata) + + async def send_reasoning_end(self, chat_id, metadata=None): + return await self._end_mock(chat_id, metadata) @pytest.fixture @@ -50,17 +62,52 @@ def manager() -> ChannelManager: @pytest.mark.asyncio -async def test_reasoning_routes_to_send_reasoning_not_send(manager): +async def test_reasoning_delta_routes_to_send_reasoning_delta(manager): channel = manager.channels["mock"] msg = OutboundMessage( channel="mock", chat_id="c1", - content="step-by-step thinking", + content="step-by-step", + metadata={"_progress": True, "_reasoning_delta": True, "_stream_id": "r1"}, + ) + await manager._send_once(channel, msg) + channel._delta_mock.assert_awaited_once() + args = channel._delta_mock.await_args.args + assert args[0] == "c1" + assert args[1] == "step-by-step" + channel._send_mock.assert_not_awaited() + channel._end_mock.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_reasoning_end_routes_to_send_reasoning_end(manager): + channel = manager.channels["mock"] + msg = OutboundMessage( + channel="mock", + chat_id="c1", + content="", + metadata={"_progress": True, "_reasoning_end": True, "_stream_id": "r1"}, + ) + await manager._send_once(channel, msg) + channel._end_mock.assert_awaited_once() + channel._delta_mock.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_legacy_one_shot_reasoning_expands_to_delta_plus_end(manager): + """`_reasoning` (no delta/end pair) falls back through `send_reasoning` + which the base class expands to a single delta + end. Hooks that haven't + migrated still surface in WebUI as a complete stream segment.""" + channel = manager.channels["mock"] + msg = OutboundMessage( + channel="mock", + chat_id="c1", + content="one-shot reasoning", metadata={"_progress": True, "_reasoning": True}, ) await manager._send_once(channel, msg) - channel._send_reasoning_mock.assert_awaited_once_with(msg) - channel._send_mock.assert_not_awaited() + channel._delta_mock.assert_awaited_once() + channel._end_mock.assert_awaited_once() @pytest.mark.asyncio @@ -71,14 +118,14 @@ async def test_dispatch_drops_reasoning_when_channel_opts_out(manager): channel="mock", chat_id="c1", content="hidden thinking", - metadata={"_progress": True, "_reasoning": True}, + metadata={"_progress": True, "_reasoning_delta": True}, ) await manager.bus.publish_outbound(msg) - pumped = await _pump_one(manager) + await _pump_one(manager) - assert pumped is True - channel._send_reasoning_mock.assert_not_awaited() + channel._delta_mock.assert_not_awaited() + channel._end_mock.assert_not_awaited() channel._send_mock.assert_not_awaited() @@ -86,20 +133,24 @@ async def test_dispatch_drops_reasoning_when_channel_opts_out(manager): async def test_dispatch_delivers_reasoning_when_channel_opts_in(manager): channel = manager.channels["mock"] channel.show_reasoning = True - msg = OutboundMessage( + for chunk in ("first ", "second"): + await manager.bus.publish_outbound(OutboundMessage( + channel="mock", + chat_id="c1", + content=chunk, + metadata={"_progress": True, "_reasoning_delta": True, "_stream_id": "r1"}, + )) + await manager.bus.publish_outbound(OutboundMessage( channel="mock", chat_id="c1", - content="visible thinking", - metadata={"_progress": True, "_reasoning": True}, - ) - await manager.bus.publish_outbound(msg) + content="", + metadata={"_progress": True, "_reasoning_end": True, "_stream_id": "r1"}, + )) - pumped = await _pump_one(manager) + await _pump_one(manager) - assert pumped is True - channel._send_reasoning_mock.assert_awaited_once() - delivered = channel._send_reasoning_mock.await_args.args[0] - assert delivered.content == "visible thinking" + assert channel._delta_mock.await_count == 2 + channel._end_mock.assert_awaited_once() @pytest.mark.asyncio @@ -108,21 +159,19 @@ async def test_dispatch_silently_drops_reasoning_for_unknown_channel(manager): channel="ghost", chat_id="c1", content="nobody home", - metadata={"_progress": True, "_reasoning": True}, + metadata={"_progress": True, "_reasoning_delta": True}, ) await manager.bus.publish_outbound(msg) - pumped = await _pump_one(manager) + await _pump_one(manager) - assert pumped is True - # Mock channel must not receive anything destined for a different channel. - manager.channels["mock"]._send_reasoning_mock.assert_not_awaited() + manager.channels["mock"]._delta_mock.assert_not_awaited() manager.channels["mock"]._send_mock.assert_not_awaited() @pytest.mark.asyncio -async def test_base_channel_send_reasoning_is_noop_safe(): - """Plugins that don't override `send_reasoning` must not blow up.""" +async def test_base_channel_reasoning_primitives_are_noop_safe(): + """Plugins that don't override the streaming primitives must not blow up.""" class _Plain(BaseChannel): name = "plain" @@ -138,7 +187,9 @@ async def test_base_channel_send_reasoning_is_noop_safe(): pass channel = _Plain({}, MessageBus()) - # No exception, returns None. + assert await channel.send_reasoning_delta("c", "x") is None + assert await channel.send_reasoning_end("c") is None + # And the one-shot wrapper translates without raising. assert await channel.send_reasoning( OutboundMessage(channel="plain", chat_id="c", content="x", metadata={}) ) is None @@ -151,26 +202,21 @@ async def test_reasoning_routing_does_not_consult_send_progress(manager): channel = manager.channels["mock"] channel.send_progress = False channel.show_reasoning = True - msg = OutboundMessage( + await manager.bus.publish_outbound(OutboundMessage( channel="mock", chat_id="c1", content="still surfaces", - metadata={"_progress": True, "_reasoning": True}, - ) - await manager.bus.publish_outbound(msg) + metadata={"_progress": True, "_reasoning_delta": True}, + )) - pumped = await _pump_one(manager) + await _pump_one(manager) - assert pumped is True - channel._send_reasoning_mock.assert_awaited_once() + channel._delta_mock.assert_awaited_once() -async def _pump_one(manager: ChannelManager) -> bool: - """Drive the dispatcher for exactly one message, then cancel.""" - import asyncio - +async def _pump_one(manager: ChannelManager) -> None: + """Drive the dispatcher until the outbound queue drains, then cancel.""" task = asyncio.create_task(manager._dispatch_outbound()) - # Yield control until the queue drains. for _ in range(50): await asyncio.sleep(0.01) if manager.bus.outbound.qsize() == 0: @@ -180,4 +226,3 @@ async def _pump_one(manager: ChannelManager) -> bool: await task except asyncio.CancelledError: pass - return True diff --git a/tests/channels/test_websocket_channel.py b/tests/channels/test_websocket_channel.py index 0e682ed0a..f11cb21b4 100644 --- a/tests/channels/test_websocket_channel.py +++ b/tests/channels/test_websocket_channel.py @@ -359,30 +359,44 @@ async def test_send_delta_emits_delta_and_stream_end() -> None: @pytest.mark.asyncio -async def test_send_reasoning_emits_reasoning_kind_frame() -> None: +async def test_send_reasoning_delta_emits_streaming_frame() -> None: bus = MagicMock() channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus) mock_ws = AsyncMock() channel._attach(mock_ws, "chat-1") - await channel.send_reasoning(OutboundMessage( - channel="websocket", - chat_id="chat-1", - content="step-by-step thinking", - metadata={"_progress": True, "_reasoning": True}, - )) + await channel.send_reasoning_delta( + "chat-1", + "step-by-step thinking", + {"_reasoning_delta": True, "_stream_id": "r1"}, + ) mock_ws.send.assert_awaited_once() payload = json.loads(mock_ws.send.await_args.args[0]) - assert payload["event"] == "message" + assert payload["event"] == "reasoning_delta" assert payload["chat_id"] == "chat-1" assert payload["text"] == "step-by-step thinking" - assert payload["kind"] == "reasoning" + assert payload["stream_id"] == "r1" @pytest.mark.asyncio -async def test_send_reasoning_drops_empty_content() -> None: - """Empty reasoning emits nothing — keeps the frontend bubble clean.""" +async def test_send_reasoning_end_emits_close_frame() -> None: + bus = MagicMock() + channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus) + mock_ws = AsyncMock() + channel._attach(mock_ws, "chat-1") + + await channel.send_reasoning_end("chat-1", {"_reasoning_end": True, "_stream_id": "r1"}) + + payload = json.loads(mock_ws.send.await_args.args[0]) + assert payload == {"event": "reasoning_end", "chat_id": "chat-1", "stream_id": "r1"} + + +@pytest.mark.asyncio +async def test_send_reasoning_one_shot_expands_to_delta_plus_end() -> None: + """``send_reasoning`` is back-compat for hooks that haven't migrated: + the base implementation must produce one delta and one end so the + WebUI sees the same shape either way.""" bus = MagicMock() channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus) mock_ws = AsyncMock() @@ -391,10 +405,27 @@ async def test_send_reasoning_drops_empty_content() -> None: await channel.send_reasoning(OutboundMessage( channel="websocket", chat_id="chat-1", - content="", + content="thinking", metadata={"_reasoning": True}, )) + assert mock_ws.send.await_count == 2 + first = json.loads(mock_ws.send.call_args_list[0][0][0]) + second = json.loads(mock_ws.send.call_args_list[1][0][0]) + assert first["event"] == "reasoning_delta" + assert first["text"] == "thinking" + assert second["event"] == "reasoning_end" + + +@pytest.mark.asyncio +async def test_send_reasoning_delta_drops_empty_chunks() -> None: + bus = MagicMock() + channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus) + mock_ws = AsyncMock() + channel._attach(mock_ws, "chat-1") + + await channel.send_reasoning_delta("chat-1", "", {"_reasoning_delta": True}) + mock_ws.send.assert_not_awaited() @@ -403,12 +434,8 @@ async def test_send_reasoning_without_subscribers_is_noop() -> None: bus = MagicMock() channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus) - await channel.send_reasoning(OutboundMessage( - channel="websocket", - chat_id="unattached", - content="thinking", - metadata={"_reasoning": True}, - )) + await channel.send_reasoning_delta("unattached", "thinking", None) + await channel.send_reasoning_end("unattached", None) # No subscribers, no exception, no send. diff --git a/webui/src/components/MessageBubble.tsx b/webui/src/components/MessageBubble.tsx index 556460824..9002ad500 100644 --- a/webui/src/components/MessageBubble.tsx +++ b/webui/src/components/MessageBubble.tsx @@ -1,4 +1,4 @@ -import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { useCallback, useEffect, useRef, useState } from "react"; import { Check, ChevronRight, Copy, FileIcon, ImageIcon, PlaySquare, Sparkles, Wrench } from "lucide-react"; import { useTranslation } from "react-i18next"; @@ -85,12 +85,16 @@ export function MessageBubble({ message }: MessageBubbleProps) { const empty = message.content.trim().length === 0; const media = message.media ?? []; - const reasoning = message.role === "assistant" ? message.reasoning ?? [] : []; + const reasoning = message.role === "assistant" ? message.reasoning ?? "" : ""; + const reasoningStreaming = !!(message.role === "assistant" && message.reasoningStreaming); + const hasReasoning = reasoning.length > 0 || reasoningStreaming; const showAssistantActions = message.role === "assistant" && !message.isStreaming && !empty; return (
- {reasoning.length > 0 ? : null} - {empty && message.isStreaming && reasoning.length === 0 ? ( + {hasReasoning ? ( + + ) : null} + {empty && message.isStreaming && !hasReasoning ? ( ) : empty && message.isStreaming ? null : ( <> @@ -437,33 +441,52 @@ function TraceGroup({ message, animClass }: TraceGroupProps) { } interface ReasoningBubbleProps { - lines: string[]; + text: string; + streaming: boolean; } /** - * Subordinate "thinking" trace shown above an assistant turn. Mirrors the - * CLI's italic dim ``ChevronRight`` row visually; collapsible because - * reasoning from models like DeepSeek-R1 / o-series can run long. Defaults - * to expanded while the answer is still streaming (so the user sees the - * model "thinking out loud"), but the toggle persists across rerenders. + * Subordinate "thinking" trace shown above an assistant turn. + * + * Lifecycle: + * - While ``streaming`` is true (``reasoning_delta`` frames still arriving), + * the bubble defaults to open and the header runs a shimmer + pulse so + * the user sees the model "thinking out loud" in real time. + * - On ``reasoning_end`` the bubble auto-collapses for prose density — + * the user can re-expand to inspect the chain of thought. The local + * toggle persists once the user interacts. */ -function ReasoningBubble({ lines }: ReasoningBubbleProps) { +function ReasoningBubble({ text, streaming }: ReasoningBubbleProps) { const { t } = useTranslation(); - const [open, setOpen] = useState(true); - const text = useMemo(() => lines.join("\n\n"), [lines]); + const [userToggled, setUserToggled] = useState(false); + const [openLocal, setOpenLocal] = useState(true); + const open = userToggled ? openLocal : streaming; + const onToggle = () => { + setUserToggled(true); + setOpenLocal((v) => (userToggled ? !v : !open)); + }; return (
- {open && ( + {open && text.length > 0 && (
= 0; i -= 1) { + const candidate = prev[i]; + if (candidate.role !== "assistant" || candidate.kind === "trace") continue; + const hasAnswer = candidate.content.length > 0; + if (candidate.reasoningStreaming || (!hasAnswer && candidate.reasoning !== undefined)) { + const merged: UIMessage = { + ...candidate, + reasoning: (candidate.reasoning ?? "") + chunk, + reasoningStreaming: true, + }; + return [...prev.slice(0, i), merged, ...prev.slice(i + 1)]; + } + if (!hasAnswer && candidate.isStreaming) { + const merged: UIMessage = { + ...candidate, + reasoning: chunk, + reasoningStreaming: true, + }; + return [...prev.slice(0, i), merged, ...prev.slice(i + 1)]; + } + break; + } + return [ + ...prev, + { + id: crypto.randomUUID(), + role: "assistant", + content: "", + isStreaming: true, + reasoning: chunk, + reasoningStreaming: true, + createdAt: Date.now(), + }, + ]; +} + +/** + * Find the most recent assistant placeholder that an incoming answer + * delta should adopt instead of spawning a parallel row. We look for an + * empty-content assistant turn that is still marked ``isStreaming`` — + * typically created earlier by ``reasoning_delta``. Anything else means + * the model already produced an answer in a previous turn, so the new + * delta belongs in a fresh row. + */ +function findActiveAssistantPlaceholder(prev: UIMessage[]): string | null { + const last = prev[prev.length - 1]; + if (!last) return null; + if (last.role !== "assistant" || last.kind === "trace") return null; + if (last.content.length > 0) return null; + if (!last.isStreaming) return null; + return last.id; +} + +/** + * Close the active reasoning stream segment, if any. Idempotent: a + * ``reasoning_end`` with no preceding deltas is a harmless no-op. + */ +function closeReasoningStream(prev: UIMessage[]): UIMessage[] { + for (let i = prev.length - 1; i >= 0; i -= 1) { + const candidate = prev[i]; + if (!candidate.reasoningStreaming) continue; + const merged: UIMessage = { ...candidate, reasoningStreaming: false }; + return [...prev.slice(0, i), merged, ...prev.slice(i + 1)]; + } + return prev; +} + /** * Subscribe to a chat by ID. Returns the in-memory message list for the chat, * a streaming flag, and a ``send`` function. Initial history must be seeded @@ -122,27 +198,42 @@ export function useNanobotStream( if (ev.event === "delta") { if (suppressStreamUntilTurnEndRef.current) return; - const id = buffer.current?.messageId ?? crypto.randomUUID(); - if (!buffer.current) { - buffer.current = { messageId: id, parts: [] }; - setMessages((prev) => [ - ...prev, - { - id, - role: "assistant", - content: "", - isStreaming: true, - createdAt: Date.now(), - }, - ]); - setIsStreaming(true); - } - buffer.current.parts.push(ev.text); - const combined = buffer.current.parts.join(""); - const targetId = buffer.current.messageId; - setMessages((prev) => - prev.map((m) => (m.id === targetId ? { ...m, content: combined } : m)), - ); + const chunk = ev.text; + setIsStreaming(true); + setMessages((prev) => { + // Reuse an in-flight assistant placeholder (typically created by + // ``reasoning_delta``) so the answer renders below its own + // thinking trace instead of in a parallel row. + const adopted = !buffer.current ? findActiveAssistantPlaceholder(prev) : null; + let targetId: string; + let next: UIMessage[]; + if (buffer.current) { + targetId = buffer.current.messageId; + next = prev; + } else if (adopted) { + targetId = adopted; + buffer.current = { messageId: targetId, parts: [] }; + next = prev; + } else { + targetId = crypto.randomUUID(); + buffer.current = { messageId: targetId, parts: [] }; + next = [ + ...prev, + { + id: targetId, + role: "assistant", + content: "", + isStreaming: true, + createdAt: Date.now(), + }, + ]; + } + buffer.current.parts.push(chunk); + const combined = buffer.current.parts.join(""); + return next.map((m) => + m.id === targetId ? { ...m, content: combined, isStreaming: true } : m, + ); + }); return; } @@ -159,6 +250,21 @@ export function useNanobotStream( return; } + if (ev.event === "reasoning_delta") { + if (suppressStreamUntilTurnEndRef.current) return; + const chunk = ev.text; + if (!chunk) return; + setMessages((prev) => attachReasoningChunk(prev, chunk)); + setIsStreaming(true); + return; + } + + if (ev.event === "reasoning_end") { + if (suppressStreamUntilTurnEndRef.current) return; + setMessages((prev) => closeReasoningStream(prev)); + return; + } + if (ev.event === "turn_end") { // Definitive signal that the turn is fully complete. Cancel any // pending debounce timer and stop the loading indicator immediately. @@ -187,37 +293,13 @@ export function useNanobotStream( ) { return; } - // Model reasoning rides its own channel: stash it on the next - // assistant turn so the bubble renders it as a subordinate trace. - // If the assistant message hasn't materialized yet (typical, since - // reasoning fires before tool calls/answers), park it on a sentinel - // pending row that the next assistant message absorbs. + // Back-compat: a legacy ``kind: "reasoning"`` message (no streaming + // partner) is treated as one complete delta + immediate end so the + // bubble renders identically to the streaming path. if (ev.kind === "reasoning") { const line = ev.text; if (!line) return; - setMessages((prev) => { - for (let i = prev.length - 1; i >= 0; i -= 1) { - const candidate = prev[i]; - if (candidate.role === "assistant" && candidate.kind !== "trace") { - const merged: UIMessage = { - ...candidate, - reasoning: [...(candidate.reasoning ?? []), line], - }; - return [...prev.slice(0, i), merged, ...prev.slice(i + 1)]; - } - } - return [ - ...prev, - { - id: crypto.randomUUID(), - role: "assistant", - content: "", - isStreaming: true, - reasoning: [line], - createdAt: Date.now(), - }, - ]; - }); + setMessages((prev) => closeReasoningStream(attachReasoningChunk(prev, line))); return; } // Intermediate agent breadcrumbs (tool-call hints, raw progress). diff --git a/webui/src/i18n/locales/en/common.json b/webui/src/i18n/locales/en/common.json index 1f6eb7b54..e82a8f5b7 100644 --- a/webui/src/i18n/locales/en/common.json +++ b/webui/src/i18n/locales/en/common.json @@ -333,6 +333,7 @@ "toolSingle": "Using a tool", "toolMany": "Used {{count}} tools", "reasoning": "Thinking", + "reasoningStreaming": "Thinking…", "imageAttachment": "Image attachment", "copyReply": "Copy reply", "copiedReply": "Copied reply" diff --git a/webui/src/i18n/locales/zh-CN/common.json b/webui/src/i18n/locales/zh-CN/common.json index 662a5f7bd..18d4b5e16 100644 --- a/webui/src/i18n/locales/zh-CN/common.json +++ b/webui/src/i18n/locales/zh-CN/common.json @@ -320,7 +320,8 @@ "assistantTyping": "助手正在输入", "toolSingle": "正在使用工具", "toolMany": "已使用 {{count}} 个工具", - "reasoning": "思考中", + "reasoning": "思考过程", + "reasoningStreaming": "正在思考…", "imageAttachment": "图片附件", "copyReply": "复制回复", "copiedReply": "已复制回复" diff --git a/webui/src/lib/types.ts b/webui/src/lib/types.ts index 0338b75f3..25c317753 100644 --- a/webui/src/lib/types.ts +++ b/webui/src/lib/types.ts @@ -44,10 +44,13 @@ export interface UIMessage { images?: UIImage[]; /** Signed or local UI-renderable media attachments. */ media?: UIMediaAttachment[]; - /** Assistant turn: model reasoning / thinking content collected from - * `kind: "reasoning"` frames. Each entry is one emit cycle, joined with - * blank lines on render. */ - reasoning?: string[]; + /** Assistant turn: accumulated model reasoning / thinking text. Built up + * incrementally from ``reasoning_delta`` frames; finalized when + * ``reasoning_end`` arrives. */ + reasoning?: string; + /** True while ``reasoning_delta`` frames are still arriving for this turn. + * Drives the shimmer header on ``ReasoningBubble``. */ + reasoningStreaming?: boolean; } export interface ChatSummary { @@ -158,6 +161,17 @@ export type InboundEvent = chat_id: string; stream_id?: string; } + | { + event: "reasoning_delta"; + chat_id: string; + text: string; + stream_id?: string; + } + | { + event: "reasoning_end"; + chat_id: string; + stream_id?: string; + } | { event: "runtime_model_updated"; model_name: string; diff --git a/webui/src/tests/message-bubble.test.tsx b/webui/src/tests/message-bubble.test.tsx index 77608b121..29c40a3b8 100644 --- a/webui/src/tests/message-bubble.test.tsx +++ b/webui/src/tests/message-bubble.test.tsx @@ -103,37 +103,41 @@ describe("MessageBubble", () => { expect(container.querySelector("video[controls]")).toBeInTheDocument(); }); - it("surfaces reasoning content above the assistant answer when provided", () => { + it("auto-expands the reasoning trace while streaming with a shimmer header", () => { const message: UIMessage = { - id: "a-reasoning", + id: "a-reasoning-streaming", + role: "assistant", + content: "", + createdAt: Date.now(), + reasoning: "Step 1: parse intent. Step 2: compute.", + reasoningStreaming: true, + }; + + const { container } = render(); + + expect(screen.getByText("Thinking…")).toBeInTheDocument(); + expect(screen.getByText(/Step 1: parse intent\./)).toBeInTheDocument(); + expect(container.querySelector(".reasoning-shimmer")).toBeInTheDocument(); + }); + + it("collapses the reasoning section by default once streaming ends", () => { + const message: UIMessage = { + id: "a-reasoning-done", role: "assistant", content: "The answer is 42.", createdAt: Date.now(), - reasoning: ["Step 1: parse intent.", "Step 2: compute."], + reasoning: "hidden until expanded", + reasoningStreaming: false, }; render(); expect(screen.getByText("Thinking")).toBeInTheDocument(); - expect(screen.getByText(/Step 1: parse intent\./)).toBeInTheDocument(); - expect(screen.getByText(/Step 2: compute\./)).toBeInTheDocument(); expect(screen.getByText("The answer is 42.")).toBeInTheDocument(); - }); + expect(screen.queryByText("hidden until expanded")).not.toBeInTheDocument(); - it("collapses the reasoning section when toggled", () => { - const message: UIMessage = { - id: "a-reasoning-collapse", - role: "assistant", - content: "done", - createdAt: Date.now(), - reasoning: ["hidden after toggle"], - }; - - render(); - - expect(screen.getByText("hidden after toggle")).toBeInTheDocument(); fireEvent.click(screen.getByRole("button", { name: /thinking/i })); - expect(screen.queryByText("hidden after toggle")).not.toBeInTheDocument(); + expect(screen.getByText("hidden until expanded")).toBeInTheDocument(); }); it("renders assistant image media as a larger generated result", () => { diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx index 7fb94063c..145d36c1c 100644 --- a/webui/src/tests/useNanobotStream.test.tsx +++ b/webui/src/tests/useNanobotStream.test.tsx @@ -113,7 +113,7 @@ describe("useNanobotStream", () => { expect(result.current.messages[1].kind).toBeUndefined(); }); - it("parks reasoning frames on a placeholder assistant message until the answer arrives", () => { + it("accumulates reasoning_delta chunks on a placeholder until reasoning_end", () => { const fake = fakeClient(); const { result } = renderHook(() => useNanobotStream("chat-r", EMPTY_MESSAGES), { wrapper: wrap(fake.client), @@ -121,28 +121,31 @@ describe("useNanobotStream", () => { act(() => { fake.emit("chat-r", { - event: "message", + event: "reasoning_delta", chat_id: "chat-r", - text: "Let me think step by step.", - kind: "reasoning", + text: "Let me think ", }); fake.emit("chat-r", { - event: "message", + event: "reasoning_delta", chat_id: "chat-r", - text: "First, decompose the request.", - kind: "reasoning", + text: "step by step.", }); }); expect(result.current.messages).toHaveLength(1); expect(result.current.messages[0].role).toBe("assistant"); - expect(result.current.messages[0].reasoning).toEqual([ - "Let me think step by step.", - "First, decompose the request.", - ]); + expect(result.current.messages[0].reasoning).toBe("Let me think step by step."); + expect(result.current.messages[0].reasoningStreaming).toBe(true); + + act(() => { + fake.emit("chat-r", { event: "reasoning_end", chat_id: "chat-r" }); + }); + + expect(result.current.messages[0].reasoningStreaming).toBe(false); + expect(result.current.messages[0].reasoning).toBe("Let me think step by step."); }); - it("attaches reasoning to the latest assistant turn rather than spawning a new one", () => { + it("absorbs a streaming reasoning placeholder into the answer turn that follows", () => { const fake = fakeClient(); const { result } = renderHook(() => useNanobotStream("chat-r2", EMPTY_MESSAGES), { wrapper: wrap(fake.client), @@ -150,24 +153,26 @@ describe("useNanobotStream", () => { act(() => { fake.emit("chat-r2", { - event: "message", + event: "reasoning_delta", + chat_id: "chat-r2", + text: "Plan first.", + }); + fake.emit("chat-r2", { event: "reasoning_end", chat_id: "chat-r2" }); + fake.emit("chat-r2", { + event: "delta", chat_id: "chat-r2", text: "The answer is 42.", }); - fake.emit("chat-r2", { - event: "message", - chat_id: "chat-r2", - text: "Reasoning surfaced post-hoc.", - kind: "reasoning", - }); + fake.emit("chat-r2", { event: "stream_end", chat_id: "chat-r2" }); }); expect(result.current.messages).toHaveLength(1); expect(result.current.messages[0].content).toBe("The answer is 42."); - expect(result.current.messages[0].reasoning).toEqual(["Reasoning surfaced post-hoc."]); + expect(result.current.messages[0].reasoning).toBe("Plan first."); + expect(result.current.messages[0].reasoningStreaming).toBe(false); }); - it("ignores empty reasoning frames", () => { + it("ignores empty reasoning_delta frames", () => { const fake = fakeClient(); const { result } = renderHook(() => useNanobotStream("chat-r3", EMPTY_MESSAGES), { wrapper: wrap(fake.client), @@ -175,16 +180,35 @@ describe("useNanobotStream", () => { act(() => { fake.emit("chat-r3", { - event: "message", + event: "reasoning_delta", chat_id: "chat-r3", text: "", - kind: "reasoning", }); }); expect(result.current.messages).toHaveLength(0); }); + it("treats legacy kind=reasoning messages as a complete delta + end pair", () => { + const fake = fakeClient(); + const { result } = renderHook(() => useNanobotStream("chat-r4", EMPTY_MESSAGES), { + wrapper: wrap(fake.client), + }); + + act(() => { + fake.emit("chat-r4", { + event: "message", + chat_id: "chat-r4", + text: "one-shot reasoning", + kind: "reasoning", + }); + }); + + expect(result.current.messages).toHaveLength(1); + expect(result.current.messages[0].reasoning).toBe("one-shot reasoning"); + expect(result.current.messages[0].reasoningStreaming).toBe(false); + }); + it("attaches assistant media_urls to complete messages", () => { const fake = fakeClient(); const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), { From 9829cf66d2530d3eb41722cf29404824557fa589 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 07:20:36 +0000 Subject: [PATCH 07/17] fix(webui): keep late reasoning attached above the answer Some providers only surface structured `reasoning_content` after answer text has already streamed. The WebUI was treating those late `reasoning_delta` frames as a fresh assistant placeholder, so the Thinking bubble rendered below the already-visible answer. Attach late reasoning back to the active assistant turn instead. The bubble still renders above the message content, preserving the expected Thinking -> answer order even when the provider protocol delivers the reasoning post-hoc. Added a regression test for answer-first followed by reasoning_delta/reasoning_end. Co-authored-by: Cursor --- webui/src/hooks/useNanobotStream.ts | 16 +++++++++---- webui/src/tests/useNanobotStream.test.tsx | 29 +++++++++++++++++++++++ 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts index 60736b393..8e83b9eb2 100644 --- a/webui/src/hooks/useNanobotStream.ts +++ b/webui/src/hooks/useNanobotStream.ts @@ -21,17 +21,23 @@ interface StreamBuffer { /** * Append a reasoning chunk to the last open reasoning stream in ``prev``. * - * Lookup rule: find the most recent assistant turn that is either still - * streaming reasoning (``reasoningStreaming``) or has no answer text yet. - * Anything else starts a fresh streaming placeholder so a new turn's - * reasoning never bleeds into the previous answer. + * Lookup rule: prefer the most recent assistant turn in the active UI tail. + * Most providers emit reasoning before answer text, but some only expose + * ``reasoning_content`` after the answer stream completes. In that post-hoc + * case the reasoning still belongs to the same assistant turn and must render + * above the answer, not as a new row below it. */ function attachReasoningChunk(prev: UIMessage[], chunk: string): UIMessage[] { for (let i = prev.length - 1; i >= 0; i -= 1) { const candidate = prev[i]; if (candidate.role !== "assistant" || candidate.kind === "trace") continue; const hasAnswer = candidate.content.length > 0; - if (candidate.reasoningStreaming || (!hasAnswer && candidate.reasoning !== undefined)) { + if ( + candidate.reasoningStreaming + || candidate.reasoning !== undefined + || hasAnswer + || candidate.isStreaming + ) { const merged: UIMessage = { ...candidate, reasoning: (candidate.reasoning ?? "") + chunk, diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx index 145d36c1c..f621437fd 100644 --- a/webui/src/tests/useNanobotStream.test.tsx +++ b/webui/src/tests/useNanobotStream.test.tsx @@ -209,6 +209,35 @@ describe("useNanobotStream", () => { expect(result.current.messages[0].reasoningStreaming).toBe(false); }); + it("attaches post-hoc reasoning to the same assistant turn above the answer", () => { + const fake = fakeClient(); + const { result } = renderHook(() => useNanobotStream("chat-r5", EMPTY_MESSAGES), { + wrapper: wrap(fake.client), + }); + + act(() => { + fake.emit("chat-r5", { + event: "delta", + chat_id: "chat-r5", + text: "hi~", + }); + fake.emit("chat-r5", { event: "stream_end", chat_id: "chat-r5" }); + fake.emit("chat-r5", { + event: "reasoning_delta", + chat_id: "chat-r5", + text: "This reasoning arrived after the answer stream.", + }); + fake.emit("chat-r5", { event: "reasoning_end", chat_id: "chat-r5" }); + }); + + expect(result.current.messages).toHaveLength(1); + expect(result.current.messages[0].content).toBe("hi~"); + expect(result.current.messages[0].reasoning).toBe( + "This reasoning arrived after the answer stream.", + ); + expect(result.current.messages[0].reasoningStreaming).toBe(false); + }); + it("attaches assistant media_urls to complete messages", () => { const fake = fakeClient(); const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), { From 0033a8a1852df30b3fdb3c8f7f093659e8b443a3 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 07:28:54 +0000 Subject: [PATCH 08/17] fix(webui): keep reasoning scoped to the current user turn The post-hoc reasoning fix allowed late reasoning frames to attach back to the nearest assistant message, but the scan crossed a newer user message. That made the next turn's Thinking bubble render above the previous assistant reply. Treat the latest user message as a hard boundary: reasoning after it must start a new assistant placeholder and can no longer attach to earlier assistant turns. Add a regression covering previous assistant -> new user -> reasoning_delta. Co-authored-by: Cursor --- webui/src/hooks/useNanobotStream.ts | 3 ++ webui/src/tests/useNanobotStream.test.tsx | 38 +++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts index 8e83b9eb2..d2a229730 100644 --- a/webui/src/hooks/useNanobotStream.ts +++ b/webui/src/hooks/useNanobotStream.ts @@ -30,6 +30,9 @@ interface StreamBuffer { function attachReasoningChunk(prev: UIMessage[], chunk: string): UIMessage[] { for (let i = prev.length - 1; i >= 0; i -= 1) { const candidate = prev[i]; + // A user turn is a hard boundary: reasoning after it belongs to the new + // assistant turn, never to an earlier assistant reply. + if (candidate.role === "user") break; if (candidate.role !== "assistant" || candidate.kind === "trace") continue; const hasAnswer = candidate.content.length > 0; if ( diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx index f621437fd..41e6ca3cf 100644 --- a/webui/src/tests/useNanobotStream.test.tsx +++ b/webui/src/tests/useNanobotStream.test.tsx @@ -238,6 +238,44 @@ describe("useNanobotStream", () => { expect(result.current.messages[0].reasoningStreaming).toBe(false); }); + it("does not attach a new turn's reasoning across the latest user boundary", () => { + const fake = fakeClient(); + const initialMessages = [ + { + id: "a-prev", + role: "assistant" as const, + content: "Previous answer.", + reasoning: "Previous thought.", + createdAt: Date.now(), + }, + { + id: "u-next", + role: "user" as const, + content: "Next question", + createdAt: Date.now(), + }, + ]; + const { result } = renderHook( + () => useNanobotStream("chat-r6", initialMessages), + { wrapper: wrap(fake.client) }, + ); + + act(() => { + fake.emit("chat-r6", { + event: "reasoning_delta", + chat_id: "chat-r6", + text: "New turn thinking.", + }); + }); + + expect(result.current.messages).toHaveLength(3); + expect(result.current.messages[0].reasoning).toBe("Previous thought."); + expect(result.current.messages[2].role).toBe("assistant"); + expect(result.current.messages[2].content).toBe(""); + expect(result.current.messages[2].reasoning).toBe("New turn thinking."); + expect(result.current.messages[2].reasoningStreaming).toBe(true); + }); + it("attaches assistant media_urls to complete messages", () => { const fake = fakeClient(); const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), { From 278affc25e461b6235708798ab9dd5ec946ae064 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 07:33:52 +0000 Subject: [PATCH 09/17] fix(webui): hydrate reasoning and tool traces from history Live reasoning/tool frames were rendering correctly, but refreshing WebUI replayed only role/content/media from `/api/sessions/:key/messages`. Assistant `reasoning_content` / `thinking_blocks` and `tool_calls` were already persisted by the backend and returned by the history endpoint, but useSessionHistory discarded them. Hydrate persisted assistant reasoning into `UIMessage.reasoning` and reconstruct assistant tool calls as `kind: "trace"` rows so the replayed thread keeps the same Thinking bubble and Used tools block as the live stream. Tool result rows remain hidden from the conversation view to avoid replaying raw tool output as chat text. Adds regression coverage for both persisted reasoning and historical tool call trace hydration. Co-authored-by: Cursor --- webui/src/hooks/useSessions.ts | 66 +++++++++++++++++++-- webui/src/lib/api.ts | 2 + webui/src/tests/useSessions.test.tsx | 86 ++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 5 deletions(-) diff --git a/webui/src/hooks/useSessions.ts b/webui/src/hooks/useSessions.ts index e05e16a20..d1be437b7 100644 --- a/webui/src/hooks/useSessions.ts +++ b/webui/src/hooks/useSessions.ts @@ -14,6 +14,48 @@ import type { ChatSummary, UIMessage } from "@/lib/types"; const EMPTY_MESSAGES: UIMessage[] = []; +type HistoryMessage = Awaited>["messages"][number]; + +function reasoningFromHistory(message: HistoryMessage): string | undefined { + if (typeof message.reasoning_content === "string" && message.reasoning_content.trim()) { + return message.reasoning_content; + } + if (!Array.isArray(message.thinking_blocks)) return undefined; + const parts = message.thinking_blocks + .map((block) => { + if (!block || typeof block !== "object") return ""; + const thinking = (block as { thinking?: unknown }).thinking; + return typeof thinking === "string" ? thinking.trim() : ""; + }) + .filter(Boolean); + return parts.length > 0 ? parts.join("\n\n") : undefined; +} + +function formatToolCallTrace(call: unknown): string | null { + if (!call || typeof call !== "object") return null; + const item = call as { + name?: unknown; + function?: { name?: unknown; arguments?: unknown }; + }; + const name = + typeof item.function?.name === "string" + ? item.function.name + : typeof item.name === "string" + ? item.name + : ""; + if (!name) return null; + const args = item.function?.arguments; + if (typeof args === "string" && args.trim()) return `${name}(${args})`; + return `${name}()`; +} + +function toolTracesFromHistory(message: HistoryMessage): string[] { + if (!Array.isArray(message.tool_calls)) return []; + return message.tool_calls + .map(formatToolCallTrace) + .filter((trace): trace is string => !!trace); +} + /** Sidebar state: fetches the full session list and exposes create / delete actions. */ export function useSessions(): { sessions: ChatSummary[]; @@ -143,14 +185,28 @@ export function useSessionHistory(key: string | null): { m.role === "user" && media?.every((item) => item.kind === "image") ? media.map((item) => ({ url: item.url, name: item.name })) : undefined; + const row: UIMessage = { + id: `hist-${idx}`, + role: m.role, + content: m.content, + createdAt: m.timestamp ? Date.parse(m.timestamp) : Date.now(), + ...(images ? { images } : {}), + ...(media ? { media } : {}), + ...(m.role === "assistant" && reasoningFromHistory(m) + ? { reasoning: reasoningFromHistory(m), reasoningStreaming: false } + : {}), + }; + const traces = m.role === "assistant" ? toolTracesFromHistory(m) : []; + if (traces.length === 0) return [row]; return [ + ...(row.content.trim() || row.reasoning || row.media?.length ? [row] : []), { - id: `hist-${idx}`, - role: m.role, - content: m.content, + id: `hist-${idx}-tools`, + role: "tool" as const, + kind: "trace" as const, + content: traces[traces.length - 1], + traces, createdAt: m.timestamp ? Date.parse(m.timestamp) : Date.now(), - ...(images ? { images } : {}), - ...(media ? { media } : {}), }, ]; }); diff --git a/webui/src/lib/api.ts b/webui/src/lib/api.ts index 23a8c2a67..c27ebd3d6 100644 --- a/webui/src/lib/api.ts +++ b/webui/src/lib/api.ts @@ -89,6 +89,8 @@ export async function fetchSessionMessages( content: string; timestamp?: string; tool_calls?: unknown; + reasoning_content?: string | null; + thinking_blocks?: unknown; tool_call_id?: string; name?: string; /** Present on ``user`` turns that attached images. Paths have already diff --git a/webui/src/tests/useSessions.test.tsx b/webui/src/tests/useSessions.test.tsx index 4805c6567..988b97252 100644 --- a/webui/src/tests/useSessions.test.tsx +++ b/webui/src/tests/useSessions.test.tsx @@ -170,6 +170,92 @@ describe("useSessions", () => { ]); }); + it("hydrates persisted assistant reasoning into the replayed message", async () => { + vi.mocked(api.fetchSessionMessages).mockResolvedValue({ + key: "websocket:chat-reasoning", + created_at: "2026-04-20T10:00:00Z", + updated_at: "2026-04-20T10:05:00Z", + messages: [ + { + role: "assistant", + content: "final answer", + timestamp: "2026-04-20T10:00:01Z", + reasoning_content: "hidden but persisted reasoning", + }, + ], + }); + + const { result } = renderHook(() => useSessionHistory("websocket:chat-reasoning"), { + wrapper: wrap(fakeClient()), + }); + + await waitFor(() => expect(result.current.loading).toBe(false)); + + expect(result.current.messages).toHaveLength(1); + expect(result.current.messages[0].role).toBe("assistant"); + expect(result.current.messages[0].content).toBe("final answer"); + expect(result.current.messages[0].reasoning).toBe("hidden but persisted reasoning"); + expect(result.current.messages[0].reasoningStreaming).toBe(false); + }); + + it("hydrates historical assistant tool calls into a replay trace row", async () => { + vi.mocked(api.fetchSessionMessages).mockResolvedValue({ + key: "websocket:chat-tools", + created_at: "2026-04-20T10:00:00Z", + updated_at: "2026-04-20T10:05:00Z", + messages: [ + { + role: "user", + content: "research this", + timestamp: "2026-04-20T10:00:00Z", + }, + { + role: "assistant", + content: "", + timestamp: "2026-04-20T10:00:01Z", + tool_calls: [ + { + id: "call-1", + type: "function", + function: { name: "web_search", arguments: "{\"query\":\"agents\"}" }, + }, + { + id: "call-2", + type: "function", + function: { name: "web_fetch", arguments: "{\"url\":\"https://example.com\"}" }, + }, + ], + }, + { + role: "tool", + content: "tool output that should not render directly", + timestamp: "2026-04-20T10:00:02Z", + tool_call_id: "call-1", + }, + { + role: "assistant", + content: "summary", + timestamp: "2026-04-20T10:00:03Z", + }, + ], + }); + + const { result } = renderHook(() => useSessionHistory("websocket:chat-tools"), { + wrapper: wrap(fakeClient()), + }); + + await waitFor(() => expect(result.current.loading).toBe(false)); + + expect(result.current.messages.map((m) => m.role)).toEqual(["user", "tool", "assistant"]); + const trace = result.current.messages[1]; + expect(trace.kind).toBe("trace"); + expect(trace.traces).toEqual([ + "web_search({\"query\":\"agents\"})", + "web_fetch({\"url\":\"https://example.com\"})", + ]); + expect(result.current.messages[2].content).toBe("summary"); + }); + it("flags history with trailing assistant tool calls as still pending", async () => { vi.mocked(api.fetchSessionMessages).mockResolvedValue({ key: "websocket:chat-pending", From 521aaa5ecfb1a65f1f7d203ad1913575734028d1 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 07:49:44 +0000 Subject: [PATCH 10/17] fix(webui): split reasoning at tool trace boundaries Live rendering merged reasoning chunks by scanning backward to the latest assistant row. That fixed late reasoning, but the scan skipped trace rows, so reasoning after a tool call crossed the Used tools block and attached to the previous assistant iteration. Refresh looked correct because persisted history reconstructs assistant/tool boundaries. Treat trace rows as hard phase boundaries, just like user messages. A reasoning_delta after Used tools now starts a fresh assistant placeholder, so live rendering matches replay: Thinking -> Used tools -> Thinking -> Used tools / answer. Add a regression for reasoning_delta -> reasoning_end -> tool_hint -> reasoning_delta. Co-authored-by: Cursor --- webui/src/hooks/useNanobotStream.ts | 6 +++- webui/src/tests/useNanobotStream.test.tsx | 39 +++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts index d2a229730..10f1e2400 100644 --- a/webui/src/hooks/useNanobotStream.ts +++ b/webui/src/hooks/useNanobotStream.ts @@ -33,7 +33,11 @@ function attachReasoningChunk(prev: UIMessage[], chunk: string): UIMessage[] { // A user turn is a hard boundary: reasoning after it belongs to the new // assistant turn, never to an earlier assistant reply. if (candidate.role === "user") break; - if (candidate.role !== "assistant" || candidate.kind === "trace") continue; + // A trace row (e.g. Used tools) is also a phase boundary. Reasoning after + // tools belongs to the next assistant iteration, not the assistant turn + // that produced those tool calls. + if (candidate.kind === "trace") break; + if (candidate.role !== "assistant") continue; const hasAnswer = candidate.content.length > 0; if ( candidate.reasoningStreaming diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx index 41e6ca3cf..0aa069cfb 100644 --- a/webui/src/tests/useNanobotStream.test.tsx +++ b/webui/src/tests/useNanobotStream.test.tsx @@ -276,6 +276,45 @@ describe("useNanobotStream", () => { expect(result.current.messages[2].reasoningStreaming).toBe(true); }); + it("does not attach reasoning across a tool trace boundary", () => { + const fake = fakeClient(); + const { result } = renderHook(() => useNanobotStream("chat-r7", EMPTY_MESSAGES), { + wrapper: wrap(fake.client), + }); + + act(() => { + fake.emit("chat-r7", { + event: "reasoning_delta", + chat_id: "chat-r7", + text: "First reasoning.", + }); + fake.emit("chat-r7", { event: "reasoning_end", chat_id: "chat-r7" }); + fake.emit("chat-r7", { + event: "message", + chat_id: "chat-r7", + text: "web_search({\"query\":\"OpenClaw\"})", + kind: "tool_hint", + }); + fake.emit("chat-r7", { + event: "reasoning_delta", + chat_id: "chat-r7", + text: "Second reasoning.", + }); + }); + + expect(result.current.messages).toHaveLength(3); + expect(result.current.messages.map((m) => m.kind ?? "message")).toEqual([ + "message", + "trace", + "message", + ]); + expect(result.current.messages[0].reasoning).toBe("First reasoning."); + expect(result.current.messages[1].traces).toEqual([ + "web_search({\"query\":\"OpenClaw\"})", + ]); + expect(result.current.messages[2].reasoning).toBe("Second reasoning."); + }); + it("attaches assistant media_urls to complete messages", () => { const fake = fakeClient(); const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), { From c7ec5d3b75bac7cc667abb702d808c901843e865 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 07:58:24 +0000 Subject: [PATCH 11/17] fix(webui): align thinking and tool trace affordances Tool trace groups are supporting details, so default them to collapsed. Match the Thinking bubble's expanded body to the tool trace affordance by using the same grouped header and animated fade/slide body treatment. Update MessageBubble tests to assert tool traces start collapsed and expand on click. Co-authored-by: Cursor --- webui/src/components/MessageBubble.tsx | 11 ++++++----- webui/src/tests/message-bubble.test.tsx | 7 ++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/webui/src/components/MessageBubble.tsx b/webui/src/components/MessageBubble.tsx index 9002ad500..abf85f663 100644 --- a/webui/src/components/MessageBubble.tsx +++ b/webui/src/components/MessageBubble.tsx @@ -386,14 +386,14 @@ interface TraceGroupProps { /** * Collapsible group of tool-call / progress breadcrumbs. Defaults to - * expanded for discoverability; a single click on the header folds the - * group down to a one-line summary so it never dominates the thread. + * collapsed because tool traces are supporting evidence, not the answer. + * A single click expands the exact calls when the user wants details. */ function TraceGroup({ message, animClass }: TraceGroupProps) { const { t } = useTranslation(); const lines = message.traces ?? [message.content]; const count = lines.length; - const [open, setOpen] = useState(true); + const [open, setOpen] = useState(false); return (
+ +
, + ); + + fireEvent.change(screen.getByLabelText("Message input"), { + target: { value: "/" }, + }); + expect(screen.getByRole("listbox", { name: "Slash commands" })).toBeInTheDocument(); + + fireEvent.pointerDown(screen.getByRole("button", { name: "outside" })); + + expect(screen.queryByRole("listbox", { name: "Slash commands" })).not.toBeInTheDocument(); + }); + it("sends image generation mode with automatic aspect ratio", () => { const onSend = vi.fn(); render( diff --git a/webui/src/tests/thread-shell.test.tsx b/webui/src/tests/thread-shell.test.tsx index 8dd999d6b..f9bf7db0c 100644 --- a/webui/src/tests/thread-shell.test.tsx +++ b/webui/src/tests/thread-shell.test.tsx @@ -573,7 +573,7 @@ describe("ThreadShell", () => { await waitFor(() => expect(screen.getByText("live assistant reply")).toBeInTheDocument()); }); - it("does not open slash commands on the blank welcome page", async () => { + it("opens slash commands on the blank welcome page", async () => { const client = makeClient(); vi.stubGlobal( "fetch", @@ -583,10 +583,11 @@ describe("ThreadShell", () => { return httpJson({ commands: [ { - command: "/stop", - title: "Stop current task", - description: "Cancel the active agent turn.", - icon: "square", + command: "/history", + title: "Show conversation history", + description: "Print the last N persisted messages.", + icon: "history", + arg_hint: "[n]", }, ], }); @@ -622,7 +623,8 @@ describe("ThreadShell", () => { target: { value: "/" }, }); - expect(screen.queryByRole("listbox", { name: "Slash commands" })).not.toBeInTheDocument(); + expect(screen.getByRole("listbox", { name: "Slash commands" })).toBeInTheDocument(); + expect(screen.getByRole("option", { name: /\/history/i })).toBeInTheDocument(); }); it("switches welcome quick actions when image mode is enabled", async () => { From 3fab7362624af4bde6ace8ed208e1a2142d0915d Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 09:13:16 +0000 Subject: [PATCH 15/17] fix(cli): keep trace output under assistant header Co-authored-by: Cursor --- nanobot/cli/commands.py | 25 +++++++++++-- nanobot/cli/stream.py | 47 +++++++++++++++++++----- tests/cli/test_cli_input.py | 26 +++++++++++++ tests/cli/test_interactive_retry_wait.py | 19 ++++++++++ 4 files changed, 103 insertions(+), 14 deletions(-) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index dd23cb620..e02653bf9 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -176,13 +176,15 @@ def _print_agent_response( response: str, render_markdown: bool, metadata: dict | None = None, + show_header: bool = True, ) -> None: """Render assistant response with consistent terminal styling.""" console = _make_console() content = response or "" body = _response_renderable(content, render_markdown, metadata) - console.print() - console.print(f"[cyan]{__logo__} nanobot[/cyan]") + if show_header: + console.print() + console.print(f"[cyan]{__logo__} nanobot[/cyan]") console.print(body) console.print() @@ -235,6 +237,8 @@ def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None, render target = renderer.console if renderer else console pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext()) with pause: + if renderer: + renderer.ensure_header() target.print(f" [dim]↳ {text}[/dim]") @@ -245,6 +249,8 @@ def _print_cli_reasoning(text: str, thinking: ThinkingSpinner | None, renderer: target = renderer.console if renderer else console pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext()) with pause: + if renderer: + renderer.ensure_header() target.print(f"[dim italic]✻ {text}[/dim italic]") @@ -254,6 +260,7 @@ async def _print_interactive_progress_line(text: str, thinking: ThinkingSpinner return if renderer: with renderer.pause_spinner(): + renderer.ensure_header() renderer.console.print(f" [dim]↳ {text}[/dim]") else: with thinking.pause() if thinking else nullcontext(): @@ -275,7 +282,7 @@ async def _maybe_print_interactive_progress( return False is_tool_hint = metadata.get("_tool_hint", False) - is_reasoning = metadata.get("_reasoning", False) + is_reasoning = metadata.get("_reasoning", False) or metadata.get("_reasoning_delta", False) if is_reasoning: if channels_config and not channels_config.show_reasoning: return True @@ -1118,10 +1125,14 @@ def agent( ) if not renderer.streamed: await renderer.close() + print_kwargs: dict[str, Any] = {} + if renderer.header_printed: + print_kwargs["show_header"] = False _print_agent_response( response.content if response else "", render_markdown=markdown, metadata=response.metadata if response else None, + **print_kwargs, ) await agent_loop.close_mcp() @@ -1246,8 +1257,14 @@ def agent( if content and not meta.get("_streamed"): if renderer: await renderer.close() + print_kwargs: dict[str, Any] = {} + if renderer and renderer.header_printed: + print_kwargs["show_header"] = False _print_agent_response( - content, render_markdown=markdown, metadata=meta, + content, + render_markdown=markdown, + metadata=meta, + **print_kwargs, ) elif renderer and not renderer.streamed: await renderer.close() diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py index 64cb4ed78..382ae9aac 100644 --- a/nanobot/cli/stream.py +++ b/nanobot/cli/stream.py @@ -10,6 +10,7 @@ that plagued earlier approaches. from __future__ import annotations import sys +from contextlib import contextmanager, nullcontext from rich.console import Console from rich.live import Live @@ -93,6 +94,7 @@ class StreamRenderer: self._console = _make_console() self._live: Live | None = None self._spinner: ThinkingSpinner | None = None + self._header_printed = False self._start_spinner() def _renderable(self): @@ -122,12 +124,41 @@ class StreamRenderer: """Expose the Live's console so external print functions can use it.""" return self._console + @property + def header_printed(self) -> bool: + """Whether this turn has already opened the assistant output block.""" + return self._header_printed + + def ensure_header(self) -> None: + """Print the assistant header once, before trace or answer content.""" + if self._header_printed: + return + self._stop_spinner() + self._console.print() + header = f"{self._bot_icon} {self._bot_name}" if self._bot_icon else self._bot_name + self._console.print(f"[cyan]{header}[/cyan]") + self._header_printed = True + def pause_spinner(self): - """Context manager: temporarily stop spinner for clean output.""" - if self._spinner: - return self._spinner.pause() - from contextlib import nullcontext - return nullcontext() + """Context manager: temporarily stop transient output for clean trace lines.""" + @contextmanager + def _pause(): + live_was_active = self._live is not None + if self._live: + # Trace/reasoning can arrive after answer streaming has started. + # Stop the transient Live view first so it does not leak a raw + # partial markdown frame before the trace line. + self._live.stop() + self._live = None + with self._spinner.pause() if self._spinner else nullcontext(): + yield + # If more answer deltas arrive after the trace, on_delta() will + # create a fresh Live using the existing buffer. If no deltas arrive, + # on_end() prints the final buffered answer once. + if live_was_active: + return + + return _pause() async def on_delta(self, delta: str) -> None: self.streamed = True @@ -135,10 +166,7 @@ class StreamRenderer: if self._live is None: if not self._buf.strip(): return - self._stop_spinner() - self._console.print() - header = f"{self._bot_icon} {self._bot_name}" if self._bot_icon else self._bot_name - self._console.print(f"[cyan]{header}[/cyan]") + self.ensure_header() self._live = Live( self._renderable(), console=self._console, @@ -174,7 +202,6 @@ class StreamRenderer: def pause(self): """Context manager: pause spinner for external output. No-op once streaming has started.""" - from contextlib import nullcontext if self._spinner: return self._spinner.pause() return nullcontext() diff --git a/tests/cli/test_cli_input.py b/tests/cli/test_cli_input.py index 69293f4b8..8b7a79cfc 100644 --- a/tests/cli/test_cli_input.py +++ b/tests/cli/test_cli_input.py @@ -1,4 +1,5 @@ import asyncio +from contextlib import nullcontext from unittest.mock import AsyncMock, MagicMock, call, patch import pytest @@ -96,6 +97,31 @@ def test_print_cli_progress_line_pauses_spinner_before_printing(): assert order == ["start", "stop", "print", "start", "stop"] +def test_print_cli_progress_line_opens_renderer_header_before_trace(): + """Trace lines should appear under the assistant header, not under You.""" + order: list[str] = [] + renderer = MagicMock() + renderer.console.print.side_effect = lambda *_args, **_kwargs: order.append("print") + renderer.ensure_header.side_effect = lambda: order.append("header") + renderer.pause_spinner.return_value = nullcontext() + + commands._print_cli_progress_line("tool running", None, renderer) + + assert order == ["header", "print"] + + +def test_print_cli_progress_line_stops_live_before_trace(): + """A trace line should not leak the current transient Live frame.""" + mock_live = MagicMock() + renderer = stream_mod.StreamRenderer(show_spinner=False) + renderer._live = mock_live + + commands._print_cli_progress_line("tool running", None, renderer) + + mock_live.stop.assert_called_once() + assert renderer._live is None + + @pytest.mark.asyncio async def test_print_interactive_progress_line_pauses_spinner_before_printing(): """Interactive progress output should also pause spinner cleanly.""" diff --git a/tests/cli/test_interactive_retry_wait.py b/tests/cli/test_interactive_retry_wait.py index 7ddef1c48..52c27d2c9 100644 --- a/tests/cli/test_interactive_retry_wait.py +++ b/tests/cli/test_interactive_retry_wait.py @@ -50,6 +50,25 @@ async def test_reasoning_displayed_when_show_reasoning_enabled(): assert calls == ["Let me think about this..."] +@pytest.mark.asyncio +async def test_reasoning_delta_displayed_when_show_reasoning_enabled(): + """Streamed reasoning delta frames should use the reasoning renderer.""" + calls: list[str] = [] + channels_config = SimpleNamespace( + send_progress=True, send_tool_hints=False, show_reasoning=True, + ) + msg = SimpleNamespace( + content="I should search first.", + metadata={"_progress": True, "_reasoning_delta": True}, + ) + + with patch("nanobot.cli.commands._print_cli_reasoning", side_effect=lambda t, th, r=None: calls.append(t)): + handled = await commands._maybe_print_interactive_progress(msg, None, channels_config) + + assert handled is True + assert calls == ["I should search first."] + + @pytest.mark.asyncio async def test_reasoning_hidden_when_show_reasoning_disabled(): """Reasoning content should be suppressed when show_reasoning is False.""" From 53831e161199dbfea333e06b6b4202f5e7f67dab Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 09:15:53 +0000 Subject: [PATCH 16/17] fix(cli): clear thinking spinner before trace output Co-authored-by: Cursor --- nanobot/cli/stream.py | 13 +++++++++++++ tests/cli/test_cli_input.py | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py index 382ae9aac..899950fb6 100644 --- a/nanobot/cli/stream.py +++ b/nanobot/cli/stream.py @@ -18,6 +18,16 @@ from rich.markdown import Markdown from rich.text import Text +def _clear_current_line(console: Console) -> None: + """Erase a transient status line before printing persistent output.""" + file = console.file + isatty = getattr(file, "isatty", lambda: False) + if not isatty(): + return + file.write("\r\x1b[2K") + file.flush() + + def _make_console() -> Console: """Create a Console that emits plain text when stdout is not a TTY. @@ -37,6 +47,7 @@ class ThinkingSpinner: def __init__(self, console: Console | None = None, bot_name: str = "nanobot"): c = console or _make_console() + self._console = c self._spinner = c.status(f"[dim]{bot_name} is thinking...[/dim]", spinner="dots") self._active = False @@ -48,6 +59,7 @@ class ThinkingSpinner: def __exit__(self, *exc): self._active = False self._spinner.stop() + _clear_current_line(self._console) return False def pause(self): @@ -58,6 +70,7 @@ class ThinkingSpinner: def _ctx(): if self._spinner and self._active: self._spinner.stop() + _clear_current_line(self._console) try: yield finally: diff --git a/tests/cli/test_cli_input.py b/tests/cli/test_cli_input.py index 8b7a79cfc..3f5619c4f 100644 --- a/tests/cli/test_cli_input.py +++ b/tests/cli/test_cli_input.py @@ -1,5 +1,6 @@ import asyncio from contextlib import nullcontext +from io import StringIO from unittest.mock import AsyncMock, MagicMock, call, patch import pytest @@ -97,6 +98,23 @@ def test_print_cli_progress_line_pauses_spinner_before_printing(): assert order == ["start", "stop", "print", "start", "stop"] +def test_thinking_spinner_clears_status_line_when_paused(): + """Stopping the spinner should erase its transient line before output.""" + stream = StringIO() + stream.isatty = lambda: True # type: ignore[method-assign] + mock_console = MagicMock() + mock_console.file = stream + spinner = MagicMock() + mock_console.status.return_value = spinner + + thinking = stream_mod.ThinkingSpinner(console=mock_console) + with thinking: + with thinking.pause(): + pass + + assert "\r\x1b[2K" in stream.getvalue() + + def test_print_cli_progress_line_opens_renderer_header_before_trace(): """Trace lines should appear under the assistant header, not under You.""" order: list[str] = [] From 567e95dee63aea426b9620ac894d86d094f3ef16 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Wed, 13 May 2026 09:18:59 +0000 Subject: [PATCH 17/17] fix(cli): stop spinner before resumed answer deltas Co-authored-by: Cursor --- nanobot/cli/stream.py | 7 +++++-- tests/cli/test_cli_input.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py index 899950fb6..24a141cdd 100644 --- a/nanobot/cli/stream.py +++ b/nanobot/cli/stream.py @@ -143,10 +143,13 @@ class StreamRenderer: return self._header_printed def ensure_header(self) -> None: - """Print the assistant header once, before trace or answer content.""" + """Stop transient status and print the assistant header once.""" + # A turn can print trace rows before the final answer, then restart the + # spinner while tools run. The next answer delta still needs to stop + # that spinner even though the header was already printed. + self._stop_spinner() if self._header_printed: return - self._stop_spinner() self._console.print() header = f"{self._bot_icon} {self._bot_name}" if self._bot_icon else self._bot_name self._console.print(f"[cyan]{header}[/cyan]") diff --git a/tests/cli/test_cli_input.py b/tests/cli/test_cli_input.py index 3f5619c4f..34046e8d4 100644 --- a/tests/cli/test_cli_input.py +++ b/tests/cli/test_cli_input.py @@ -115,6 +115,24 @@ def test_thinking_spinner_clears_status_line_when_paused(): assert "\r\x1b[2K" in stream.getvalue() +def test_stream_renderer_stops_spinner_even_after_header_printed(): + """A later answer delta must stop the spinner even when header already exists.""" + stream = StringIO() + stream.isatty = lambda: True # type: ignore[method-assign] + mock_console = MagicMock() + mock_console.file = stream + spinner = MagicMock() + mock_console.status.return_value = spinner + + with patch.object(stream_mod, "_make_console", return_value=mock_console): + renderer = stream_mod.StreamRenderer(show_spinner=True) + renderer._header_printed = True + renderer.ensure_header() + + spinner.stop.assert_called_once() + assert "\r\x1b[2K" in stream.getvalue() + + def test_print_cli_progress_line_opens_renderer_header_before_trace(): """Trace lines should appear under the assistant header, not under You.""" order: list[str] = []