From d630ac90d1b88086e79ba595bedfe0abab66eb74 Mon Sep 17 00:00:00 2001
From: Flinn Xie <flinnxie@outlook.com>
Date: Wed, 6 May 2026 01:34:23 +0800
Subject: [PATCH 01/17] fix(cli): prevent TUI content duplication via transient
 Live and renderer routing

Route progress output through the Live's render hook to fix cursor
misalignment that caused content duplication.  The root cause was that
progress/reasoning output used a separate Console instance, bypassing
Rich Live's process_renderables hook.  Also fixes pre-existing issue
where multiple headers printed per agent turn.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 nanobot/cli/commands.py                  | 46 ++++++++-----
 nanobot/cli/stream.py                    | 86 ++++++++++++++++--------
 tests/cli/test_interactive_retry_wait.py |  2 +-
 3 files changed, 88 insertions(+), 46 deletions(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 243280ed1..236d787ce 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -227,30 +227,37 @@ async def _print_interactive_response(
     await run_in_terminal(_write)
 
 
-def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None) -> None:
+def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None:
     """Print a CLI progress line, pausing the spinner if needed."""
     if not text.strip():
         return
-    with thinking.pause() if thinking else nullcontext():
-        console.print(f"  [dim]↳ {text}[/dim]")
+    target = renderer.console if renderer else console
+    pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext())
+    with pause:
+        target.print(f"  [dim]↳ {text}[/dim]")
 
 
-async def _print_interactive_progress_line(text: str, renderer: StreamRenderer | None) -> None:
-    """Print an interactive progress line, pausing the renderer's spinner if needed."""
+async def _print_interactive_progress_line(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None:
+    """Print an interactive progress line, pausing the spinner if needed."""
     if not text.strip():
         return
-    with renderer.pause() if renderer else nullcontext():
-        await _print_interactive_line(text)
+    if renderer:
+        with renderer.pause_spinner():
+            renderer.console.print(f"  [dim]↳ {text}[/dim]")
+    else:
+        with thinking.pause() if thinking else nullcontext():
+            await _print_interactive_line(text)
 
 
 async def _maybe_print_interactive_progress(
     msg: Any,
-    renderer: StreamRenderer | None,
+    thinking: ThinkingSpinner | None,
     channels_config: Any,
+    renderer: StreamRenderer | None = None,
 ) -> bool:
     metadata = msg.metadata or {}
     if metadata.get("_retry_wait"):
-        await _print_interactive_progress_line(msg.content, renderer)
+        await _print_interactive_progress_line(msg.content, thinking, renderer)
         return True
 
     if not metadata.get("_progress"):
@@ -262,7 +269,7 @@ async def _maybe_print_interactive_progress(
     if channels_config and not is_tool_hint and not channels_config.send_progress:
         return True
 
-    await _print_interactive_progress_line(msg.content, renderer)
+    await _print_interactive_progress_line(msg.content, thinking, renderer)
     return True
 
 
@@ -1121,13 +1128,15 @@ def agent(
     # Shared reference for progress callbacks
     _thinking: ThinkingSpinner | None = None
 
-    async def _cli_progress(content: str, *, tool_hint: bool = False, **_kwargs: Any) -> None:
-        ch = agent_loop.channels_config
-        if ch and tool_hint and not ch.send_tool_hints:
-            return
-        if ch and not tool_hint and not ch.send_progress:
-            return
-        _print_cli_progress_line(content, _thinking)
+    def _make_progress(renderer: StreamRenderer | None = None):
+        async def _cli_progress(content: str, *, tool_hint: bool = False, **_kwargs: Any) -> None:
+            ch = agent_loop.channels_config
+            if ch and tool_hint and not ch.send_tool_hints:
+                return
+            if ch and not tool_hint and not ch.send_progress:
+                return
+            _print_cli_progress_line(content, _thinking, renderer)
+        return _cli_progress
 
     if message:
         # Single message mode — direct call, no bus needed
@@ -1135,7 +1144,7 @@ def agent(
             renderer = StreamRenderer(render_markdown=markdown)
             response = await agent_loop.process_direct(
                 message, session_id,
-                on_progress=_cli_progress,
+                on_progress=_make_progress(renderer),
                 on_stream=renderer.on_delta,
                 on_stream_end=renderer.on_end,
             )
@@ -1206,6 +1215,7 @@ def agent(
                             msg,
                             renderer,
                             agent_loop.channels_config,
+                            renderer,
                         ):
                             continue
 
diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py
index b0095f153..807c88fef 100644
--- a/nanobot/cli/stream.py
+++ b/nanobot/cli/stream.py
@@ -1,13 +1,15 @@
 """Streaming renderer for CLI output.
 
-Uses Rich Live with auto_refresh=False for stable, flicker-free
-markdown rendering during streaming. Ellipsis mode handles overflow.
+Uses Rich Live with ``transient=True`` for in-place markdown updates during
+streaming.  After the live display stops, a final clean render is printed
+so the content persists on screen.  ``transient=True`` ensures the live
+area is erased before ``stop()`` returns, avoiding the duplication bug
+that plagued earlier approaches.
 """
 
 from __future__ import annotations
 
 import sys
-import time
 
 from rich.console import Console
 from rich.live import Live
@@ -67,27 +69,38 @@ class ThinkingSpinner:
 
 
 class StreamRenderer:
-    """Rich Live streaming with markdown. auto_refresh=False avoids render races.
+    """Streaming renderer with Rich Live for in-place updates.
 
-    Deltas arrive pre-filtered (no <think> tags) from the agent loop.
+    During streaming: updates content in-place via Rich Live.
+    On end: stops Live (transient=True erases it), then prints final render.
 
     Flow per round:
-      spinner -> first visible delta -> header + Live renders ->
-      on_end -> Live stops (content stays on screen)
+      spinner -> first delta -> header + Live updates ->
+      on_end -> stop Live + final render
     """
 
     def __init__(self, render_markdown: bool = True, show_spinner: bool = True):
         self._md = render_markdown
         self._show_spinner = show_spinner
         self._buf = ""
-        self._live: Live | None = None
-        self._t = 0.0
         self.streamed = False
+        self._header_printed = False
+        self._console = _make_console()
+        self._live: Live | None = None
         self._spinner: ThinkingSpinner | None = None
         self._start_spinner()
 
-    def _render(self):
-        return Markdown(self._buf) if self._md and self._buf else Text(self._buf or "")
+    def _renderable(self):
+        """Create a renderable from the current buffer."""
+        if self._md and self._buf:
+            return Markdown(self._buf)
+        return Text(self._buf or "")
+
+    def _render_str(self) -> str:
+        """Render current buffer to a plain string via Rich."""
+        with self._console.capture() as cap:
+            self._console.print(self._renderable())
+        return cap.get()
 
     def _start_spinner(self) -> None:
         if self._show_spinner:
@@ -99,36 +112,55 @@ class StreamRenderer:
             self._spinner.__exit__(None, None, None)
             self._spinner = None
 
+    @property
+    def console(self) -> Console:
+        """Expose the Live's console so external print functions can use it."""
+        return self._console
+
+    def pause_spinner(self):
+        """Context manager: temporarily stop spinner for clean output."""
+        if self._spinner:
+            return self._spinner.pause()
+        from contextlib import nullcontext
+        return nullcontext()
+
     async def on_delta(self, delta: str) -> None:
         self.streamed = True
         self._buf += delta
-        if self._live is None:
-            if not self._buf.strip():
-                return
-            self._stop_spinner()
-            c = _make_console()
-            c.print()
-            c.print(f"[cyan]{__logo__} nanobot[/cyan]")
-            self._live = Live(self._render(), console=c, auto_refresh=False)
+        if not self._header_printed and self._buf.strip():
+            self._console.print()
+            self._console.print(f"[cyan]{__logo__} nanobot[/cyan]")
+            self._header_printed = True
+        self._stop_spinner()
+        if not self._live:
+            self._live = Live(
+                self._renderable(),
+                console=self._console,
+                auto_refresh=False,
+                transient=True,
+            )
             self._live.start()
-        now = time.monotonic()
-        if (now - self._t) > 0.15:
-            self._live.update(self._render())
-            self._live.refresh()
-            self._t = now
+        else:
+            self._live.update(self._renderable())
+        self._live.refresh()
 
     async def on_end(self, *, resuming: bool = False) -> None:
         if self._live:
-            self._live.update(self._render())
+            # Double-refresh to sync _shape before stop() calls refresh().
+            self._live.refresh()
+            self._live.update(self._renderable())
             self._live.refresh()
             self._live.stop()
             self._live = None
         self._stop_spinner()
+        if self._header_printed and self._buf.strip():
+            # Print final rendered content (persists after Live is gone).
+            out = sys.stdout
+            out.write(self._render_str())
+            out.flush()
         if resuming:
             self._buf = ""
             self._start_spinner()
-        else:
-            _make_console().print()
 
     def stop_for_input(self) -> None:
         """Stop spinner before user input to avoid prompt_toolkit conflicts."""
diff --git a/tests/cli/test_interactive_retry_wait.py b/tests/cli/test_interactive_retry_wait.py
index 5cc217c56..e58102dcd 100644
--- a/tests/cli/test_interactive_retry_wait.py
+++ b/tests/cli/test_interactive_retry_wait.py
@@ -17,7 +17,7 @@ async def test_interactive_retry_wait_is_rendered_as_progress_even_when_progress
         metadata={"_retry_wait": True},
     )
 
-    async def fake_print(text: str, active_thinking: object | None) -> None:
+    async def fake_print(text: str, active_thinking: object | None, renderer=None) -> None:
         calls.append((text, active_thinking))
 
     with patch("nanobot.cli.commands._print_interactive_progress_line", side_effect=fake_print):

From 3a27af0018b106f4b9212289c75da03d3e67da62 Mon Sep 17 00:00:00 2001
From: Flinn Xie <flinnxie@outlook.com>
Date: Wed, 6 May 2026 01:35:53 +0800
Subject: [PATCH 02/17] feat(cli): display model reasoning content during
 streaming
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add show_reasoning config (default: False) to display model
thinking/reasoning content in the TUI during streaming.  Reasoning
is emitted via a new emit_reasoning hook on AgentHook, gated by the
channels config.  Display uses ✻ prefix with dim italic styling.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 nanobot/agent/hook.py                    |  6 +++
 nanobot/agent/loop.py                    | 11 +++++
 nanobot/agent/runner.py                  |  3 ++
 nanobot/cli/commands.py                  | 27 +++++++++--
 nanobot/cli/stream.py                    | 11 ++---
 nanobot/config/schema.py                 |  1 +
 tests/agent/test_hook_composite.py       | 23 ++++++++-
 tests/cli/test_cli_input.py              | 54 ++++++++++++++++++++--
 tests/cli/test_interactive_retry_wait.py | 59 ++++++++++++++++++++++++
 9 files changed, 182 insertions(+), 13 deletions(-)

diff --git a/nanobot/agent/hook.py b/nanobot/agent/hook.py
index d0106cfb6..5e4ea4d4d 100644
--- a/nanobot/agent/hook.py
+++ b/nanobot/agent/hook.py
@@ -48,6 +48,9 @@ class AgentHook:
     async def before_execute_tools(self, context: AgentHookContext) -> None:
         pass
 
+    async def emit_reasoning(self, reasoning_content: str | None) -> None:
+        pass
+
     async def after_iteration(self, context: AgentHookContext) -> None:
         pass
 
@@ -95,6 +98,9 @@ class CompositeHook(AgentHook):
     async def before_execute_tools(self, context: AgentHookContext) -> None:
         await self._for_each_hook_safe("before_execute_tools", context)
 
+    async def emit_reasoning(self, reasoning_content: str | None) -> None:
+        await self._for_each_hook_safe("emit_reasoning", reasoning_content)
+
     async def after_iteration(self, context: AgentHookContext) -> None:
         await self._for_each_hook_safe("after_iteration", context)
 
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 330c82357..e12bf53c9 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -155,6 +155,14 @@ class _LoopHook(AgentHook):
             session_key=self._session_key,
         )
 
+    async def emit_reasoning(self, reasoning_content: str | None) -> None:
+        """Send reasoning/thinking content as progress before the main answer."""
+        ch = self._loop.channels_config
+        if not ch or not ch.show_reasoning:
+            return
+        if self._on_progress and reasoning_content:
+            await self._on_progress(reasoning_content, reasoning=True)
+
     async def after_iteration(self, context: AgentHookContext) -> None:
         if (
             self._on_progress
@@ -1114,10 +1122,13 @@ class AgentLoop:
             *,
             tool_hint: bool = False,
             tool_events: list[dict[str, Any]] | None = None,
+            reasoning: bool = False,
         ) -> None:
             meta = dict(msg.metadata or {})
             meta["_progress"] = True
             meta["_tool_hint"] = tool_hint
+            if reasoning:
+                meta["_reasoning"] = True
             if tool_events:
                 meta["_tool_events"] = tool_events
             await self.bus.publish_outbound(
diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py
index 7fe92ad51..2ff2cf045 100644
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@@ -282,6 +282,9 @@ class AgentRunner:
             context.tool_calls = list(response.tool_calls)
             self._accumulate_usage(usage, raw_usage)
 
+            if response.reasoning_content:
+                await hook.emit_reasoning(response.reasoning_content)
+
             if response.should_execute_tools:
                 tool_calls = list(response.tool_calls)
                 ask_index = next((i for i, tc in enumerate(tool_calls) if tc.name == "ask_user"), None)
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 236d787ce..1c835962a 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -237,6 +237,16 @@ def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None, render
         target.print(f"  [dim]↳ {text}[/dim]")
 
 
+def _print_cli_reasoning(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None:
+    """Print reasoning/thinking content in a distinct style."""
+    if not text.strip():
+        return
+    target = renderer.console if renderer else console
+    pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext())
+    with pause:
+        target.print(f"[dim italic]✻ {text}[/dim italic]")
+
+
 async def _print_interactive_progress_line(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None:
     """Print an interactive progress line, pausing the spinner if needed."""
     if not text.strip():
@@ -264,12 +274,18 @@ async def _maybe_print_interactive_progress(
         return False
 
     is_tool_hint = metadata.get("_tool_hint", False)
+    is_reasoning = metadata.get("_reasoning", False)
     if channels_config and is_tool_hint and not channels_config.send_tool_hints:
         return True
     if channels_config and not is_tool_hint and not channels_config.send_progress:
         return True
+    if is_reasoning and channels_config and not channels_config.show_reasoning:
+        return True
 
-    await _print_interactive_progress_line(msg.content, thinking, renderer)
+    if is_reasoning:
+        _print_cli_reasoning(msg.content, thinking, renderer)
+    else:
+        await _print_interactive_progress_line(msg.content, thinking, renderer)
     return True
 
 
@@ -1129,13 +1145,18 @@ def agent(
     _thinking: ThinkingSpinner | None = None
 
     def _make_progress(renderer: StreamRenderer | None = None):
-        async def _cli_progress(content: str, *, tool_hint: bool = False, **_kwargs: Any) -> None:
+        async def _cli_progress(content: str, *, tool_hint: bool = False, reasoning: bool = False, **_kwargs: Any) -> None:
             ch = agent_loop.channels_config
             if ch and tool_hint and not ch.send_tool_hints:
                 return
             if ch and not tool_hint and not ch.send_progress:
                 return
-            _print_cli_progress_line(content, _thinking, renderer)
+            if reasoning and ch and not ch.show_reasoning:
+                return
+            if reasoning:
+                _print_cli_reasoning(content, _thinking, renderer)
+            else:
+                _print_cli_progress_line(content, _thinking, renderer)
         return _cli_progress
 
     if message:
diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py
index 807c88fef..ec7f0a96c 100644
--- a/nanobot/cli/stream.py
+++ b/nanobot/cli/stream.py
@@ -84,7 +84,6 @@ class StreamRenderer:
         self._show_spinner = show_spinner
         self._buf = ""
         self.streamed = False
-        self._header_printed = False
         self._console = _make_console()
         self._live: Live | None = None
         self._spinner: ThinkingSpinner | None = None
@@ -127,12 +126,12 @@ class StreamRenderer:
     async def on_delta(self, delta: str) -> None:
         self.streamed = True
         self._buf += delta
-        if not self._header_printed and self._buf.strip():
+        if self._live is None:
+            if not self._buf.strip():
+                return
+            self._stop_spinner()
             self._console.print()
             self._console.print(f"[cyan]{__logo__} nanobot[/cyan]")
-            self._header_printed = True
-        self._stop_spinner()
-        if not self._live:
             self._live = Live(
                 self._renderable(),
                 console=self._console,
@@ -153,7 +152,7 @@ class StreamRenderer:
             self._live.stop()
             self._live = None
         self._stop_spinner()
-        if self._header_printed and self._buf.strip():
+        if self._buf.strip():
             # Print final rendered content (persists after Live is gone).
             out = sys.stdout
             out.write(self._render_str())
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 47f2babcd..66a7a75aa 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -27,6 +27,7 @@ class ChannelsConfig(Base):
 
     send_progress: bool = True  # stream agent's text progress to the channel
     send_tool_hints: bool = False  # stream tool-call hints (e.g. read_file("…"))
+    show_reasoning: bool = False  # show model reasoning/thinking content
     send_max_retries: int = Field(default=3, ge=0, le=10)  # Max delivery attempts (initial send included)
     transcription_provider: str = "groq"  # Voice transcription backend: "groq" or "openai"
     transcription_language: str | None = Field(default=None, pattern=r"^[a-z]{2,3}$")  # Optional ISO-639-1 hint for audio transcription
diff --git a/tests/agent/test_hook_composite.py b/tests/agent/test_hook_composite.py
index 8971d48ec..9b6c2820d 100644
--- a/tests/agent/test_hook_composite.py
+++ b/tests/agent/test_hook_composite.py
@@ -13,6 +13,17 @@ def _ctx() -> AgentHookContext:
     return AgentHookContext(iteration=0, messages=[])
 
 
+# ---------------------------------------------------------------------------
+# Base AgentHook emit_reasoning: no-op
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_base_hook_emit_reasoning_is_noop():
+    hook = AgentHook()
+    await hook.emit_reasoning("should not raise")
+
+
 # ---------------------------------------------------------------------------
 # Fan-out: every hook is called in order
 # ---------------------------------------------------------------------------
@@ -45,6 +56,9 @@ async def test_composite_fans_out_all_async_methods():
         async def before_iteration(self, context: AgentHookContext) -> None:
             events.append("before_iteration")
 
+        async def emit_reasoning(self, reasoning_content: str | None) -> None:
+            events.append(f"emit_reasoning:{reasoning_content}")
+
         async def on_stream(self, context: AgentHookContext, delta: str) -> None:
             events.append(f"on_stream:{delta}")
 
@@ -61,6 +75,7 @@ async def test_composite_fans_out_all_async_methods():
     ctx = _ctx()
 
     await hook.before_iteration(ctx)
+    await hook.emit_reasoning("thinking...")
     await hook.on_stream(ctx, "hi")
     await hook.on_stream_end(ctx, resuming=True)
     await hook.before_execute_tools(ctx)
@@ -68,6 +83,7 @@ async def test_composite_fans_out_all_async_methods():
 
     assert events == [
         "before_iteration", "before_iteration",
+        "emit_reasoning:thinking...", "emit_reasoning:thinking...",
         "on_stream:hi", "on_stream:hi",
         "on_stream_end:True", "on_stream_end:True",
         "before_execute_tools", "before_execute_tools",
@@ -120,6 +136,8 @@ async def test_composite_error_isolation_all_async():
     calls: list[str] = []
 
     class Bad(AgentHook):
+        async def emit_reasoning(self, reasoning_content):
+            raise RuntimeError("err")
         async def on_stream_end(self, context, *, resuming):
             raise RuntimeError("err")
         async def before_execute_tools(self, context):
@@ -128,6 +146,8 @@ async def test_composite_error_isolation_all_async():
             raise RuntimeError("err")
 
     class Good(AgentHook):
+        async def emit_reasoning(self, reasoning_content):
+            calls.append("emit_reasoning")
         async def on_stream_end(self, context, *, resuming):
             calls.append("on_stream_end")
         async def before_execute_tools(self, context):
@@ -137,10 +157,11 @@ async def test_composite_error_isolation_all_async():
 
     hook = CompositeHook([Bad(), Good()])
     ctx = _ctx()
+    await hook.emit_reasoning("test")
     await hook.on_stream_end(ctx, resuming=False)
     await hook.before_execute_tools(ctx)
     await hook.after_iteration(ctx)
-    assert calls == ["on_stream_end", "before_execute_tools", "after_iteration"]
+    assert calls == ["emit_reasoning", "on_stream_end", "before_execute_tools", "after_iteration"]
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/cli/test_cli_input.py b/tests/cli/test_cli_input.py
index e648e818c..69293f4b8 100644
--- a/tests/cli/test_cli_input.py
+++ b/tests/cli/test_cli_input.py
@@ -156,17 +156,65 @@ def test_stream_renderer_stop_for_input_stops_spinner():
     # Create renderer with mocked console
     with patch.object(stream_mod, "_make_console", return_value=mock_console):
         renderer = stream_mod.StreamRenderer(show_spinner=True)
-        
+
         # Verify spinner started
         spinner.start.assert_called_once()
-        
+
         # Stop for input
         renderer.stop_for_input()
-        
+
         # Verify spinner stopped
         spinner.stop.assert_called_once()
 
 
+@pytest.mark.asyncio
+async def test_on_end_writes_final_content_to_stdout_after_stopping_live():
+    """on_end should stop Live (transient erases it) then print final content to stdout."""
+    mock_live = MagicMock()
+    mock_console = MagicMock()
+    mock_console.capture.return_value.__enter__ = MagicMock(
+        return_value=MagicMock(get=lambda: "final output\n")
+    )
+    mock_console.capture.return_value.__exit__ = MagicMock(return_value=False)
+
+    with patch.object(stream_mod, "_make_console", return_value=mock_console):
+        renderer = stream_mod.StreamRenderer(show_spinner=False)
+        renderer._live = mock_live
+        renderer._buf = "final output"
+
+        written: list[str] = []
+        with patch("sys.stdout") as mock_stdout:
+            mock_stdout.write = lambda s: written.append(s)
+            mock_stdout.flush = MagicMock()
+            await renderer.on_end()
+
+    mock_live.stop.assert_called_once()
+    assert renderer._live is None
+    assert written == ["final output\n"]
+
+
+@pytest.mark.asyncio
+async def test_on_end_resuming_clears_buffer_and_restarts_spinner():
+    """on_end(resuming=True) should reset state for the next iteration."""
+    spinner = MagicMock()
+    mock_console = MagicMock()
+    mock_console.status.return_value = spinner
+    mock_console.capture.return_value.__enter__ = MagicMock(
+        return_value=MagicMock(get=lambda: "")
+    )
+    mock_console.capture.return_value.__exit__ = MagicMock(return_value=False)
+
+    with patch.object(stream_mod, "_make_console", return_value=mock_console):
+        renderer = stream_mod.StreamRenderer(show_spinner=True)
+        renderer._buf = "some content"
+
+        await renderer.on_end(resuming=True)
+
+    assert renderer._buf == ""
+    # Spinner should have been restarted (start called twice: __init__ + resuming)
+    assert spinner.start.call_count == 2
+
+
 def test_make_console_force_terminal_when_stdout_is_tty():
     """Console should set force_terminal=True when stdout is a TTY (rich output)."""
     import sys
diff --git a/tests/cli/test_interactive_retry_wait.py b/tests/cli/test_interactive_retry_wait.py
index e58102dcd..e693b057c 100644
--- a/tests/cli/test_interactive_retry_wait.py
+++ b/tests/cli/test_interactive_retry_wait.py
@@ -29,3 +29,62 @@ async def test_interactive_retry_wait_is_rendered_as_progress_even_when_progress
 
     assert handled is True
     assert calls == [("Model request failed, retry in 2s (attempt 1).", thinking)]
+
+
+@pytest.mark.asyncio
+async def test_reasoning_displayed_when_show_reasoning_enabled():
+    """Reasoning content should be displayed when show_reasoning is True."""
+    calls: list[str] = []
+    channels_config = SimpleNamespace(
+        send_progress=True, send_tool_hints=False, show_reasoning=True,
+    )
+    msg = SimpleNamespace(
+        content="Let me think about this...",
+        metadata={"_progress": True, "_reasoning": True},
+    )
+
+    with patch("nanobot.cli.commands._print_cli_reasoning", side_effect=lambda t, th, r=None: calls.append(t)):
+        handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
+
+    assert handled is True
+    assert calls == ["Let me think about this..."]
+
+
+@pytest.mark.asyncio
+async def test_reasoning_hidden_when_show_reasoning_disabled():
+    """Reasoning content should be suppressed when show_reasoning is False."""
+    channels_config = SimpleNamespace(
+        send_progress=True, send_tool_hints=False, show_reasoning=False,
+    )
+    msg = SimpleNamespace(
+        content="Let me think about this...",
+        metadata={"_progress": True, "_reasoning": True},
+    )
+
+    with patch("nanobot.cli.commands._print_cli_reasoning") as mock_reasoning:
+        handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
+
+    assert handled is True
+    mock_reasoning.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_non_reasoning_progress_not_affected_by_show_reasoning():
+    """Regular progress lines should display regardless of show_reasoning."""
+    calls: list[str] = []
+    channels_config = SimpleNamespace(
+        send_progress=True, send_tool_hints=False, show_reasoning=False,
+    )
+    msg = SimpleNamespace(
+        content="working on it...",
+        metadata={"_progress": True},
+    )
+
+    async def fake_print(text: str, thinking=None, renderer=None):
+        calls.append(text)
+
+    with patch("nanobot.cli.commands._print_interactive_progress_line", side_effect=fake_print):
+        handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
+
+    assert handled is True
+    assert calls == ["working on it..."]

From 3a851f8f8de09dd0c57b295958b0b7c67d362d0a Mon Sep 17 00:00:00 2001
From: Flinn Xie <flinnxie@outlook.com>
Date: Tue, 12 May 2026 23:02:59 +0800
Subject: [PATCH 03/17] feat(reasoning): add inline think tag extraction and
 Anthropic thinking_blocks support

Add extract_think() and emit_incremental_think() helpers to extract thinking content from inline <think> and <thought> tags in the content field. This handles models served via Ollama, self-hosted vLLM, or other compatible endpoints that embed reasoning as inline tags instead of using the dedicated reasoning_content API field.

Also adds Anthropic thinking_blocks support for extended thinking via the thinking content blocks array.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
---
 nanobot/agent/loop.py           |   9 ++-
 nanobot/agent/runner.py         |  28 ++++++-
 nanobot/utils/helpers.py        |  41 +++++++++++
 tests/agent/test_runner.py      | 126 ++++++++++++++++++++++++++++++++
 tests/utils/test_strip_think.py |  83 ++++++++++++++++++++-
 5 files changed, 283 insertions(+), 4 deletions(-)

diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index e12bf53c9..9d2899b04 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -101,17 +101,23 @@ class _LoopHook(AgentHook):
         self._metadata = metadata or {}
         self._session_key = session_key
         self._stream_buf = ""
+        self._emitted_thinking = ""
 
     def wants_streaming(self) -> bool:
         return self._on_stream is not None
 
     async def on_stream(self, context: AgentHookContext, delta: str) -> None:
-        from nanobot.utils.helpers import strip_think
+        from nanobot.utils.helpers import emit_incremental_think, strip_think
 
         prev_clean = strip_think(self._stream_buf)
         self._stream_buf += delta
         new_clean = strip_think(self._stream_buf)
         incremental = new_clean[len(prev_clean) :]
+
+        self._emitted_thinking = await emit_incremental_think(
+            self._stream_buf, self._emitted_thinking, self.emit_reasoning,
+        )
+
         if incremental and self._on_stream:
             await self._on_stream(incremental)
 
@@ -119,6 +125,7 @@ class _LoopHook(AgentHook):
         if self._on_stream_end:
             await self._on_stream_end(resuming=resuming)
         self._stream_buf = ""
+        self._emitted_thinking = ""
 
     async def before_iteration(self, context: AgentHookContext) -> None:
         self._loop._current_iteration = context.iteration
diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py
index 2ff2cf045..9a1cc6d65 100644
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@@ -18,8 +18,10 @@ from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
 from nanobot.utils.helpers import (
     build_assistant_message,
+    emit_incremental_think,
     estimate_message_tokens,
     estimate_prompt_tokens_chain,
+    extract_think,
     find_legal_message_start,
     maybe_persist_tool_result,
     strip_think,
@@ -283,7 +285,23 @@ class AgentRunner:
             self._accumulate_usage(usage, raw_usage)
 
             if response.reasoning_content:
-                await hook.emit_reasoning(response.reasoning_content)
+                if not context.streamed_content:
+                    await hook.emit_reasoning(response.reasoning_content)
+                if response.content:
+                    response.content = strip_think(response.content)
+            elif response.thinking_blocks:
+                # Anthropic extended thinking: extract from thinking_blocks.
+                if not context.streamed_content:
+                    parts = [tb.get("thinking", "") for tb in response.thinking_blocks if tb.get("type") == "thinking"]
+                    if parts:
+                        await hook.emit_reasoning("\n\n".join(parts))
+            elif response.content:
+                inline_thinking, clean_content = extract_think(response.content)
+                if inline_thinking:
+                    # Only emit if streaming didn't already handle it.
+                    if not context.streamed_content:
+                        await hook.emit_reasoning(inline_thinking)
+                    response.content = clean_content
 
             if response.should_execute_tools:
                 tool_calls = list(response.tool_calls)
@@ -636,15 +654,21 @@ class AgentRunner:
             )
         elif wants_progress_streaming:
             stream_buf = ""
+            emitted_thinking = ""
 
             async def _stream_progress(delta: str) -> None:
-                nonlocal stream_buf
+                nonlocal stream_buf, emitted_thinking
                 if not delta:
                     return
                 prev_clean = strip_think(stream_buf)
                 stream_buf += delta
                 new_clean = strip_think(stream_buf)
                 incremental = new_clean[len(prev_clean):]
+
+                emitted_thinking = await emit_incremental_think(
+                    stream_buf, emitted_thinking, hook.emit_reasoning,
+                )
+
                 if incremental:
                     context.streamed_content = True
                     await spec.progress_callback(incremental)
diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
index b047e24d2..5301f4885 100644
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -71,6 +71,47 @@ def strip_think(text: str) -> str:
     return text.strip()
 
 
+def extract_think(text: str) -> tuple[str | None, str]:
+    """Extract thinking/reasoning content from <think> and <thought> tags.
+
+    Returns (thinking_text, cleaned_text) where:
+      - thinking_text: concatenated content from all <think>...</think> and
+        <thought>...</thought> blocks, or None if none found.
+      - cleaned_text: the input with all thinking blocks removed (same as
+        strip_think()).
+
+    Only extracts from well-formed closed blocks. Unclosed trailing tags
+    (common during streaming) are stripped without extraction — use
+    strip_think() for pure streaming cleanup.
+    """
+    parts: list[str] = []
+    for m in re.finditer(r"<think>([\s\S]*?)</think>", text):
+        parts.append(m.group(1).strip())
+    for m in re.finditer(r"<thought>([\s\S]*?)</thought>", text):
+        parts.append(m.group(1).strip())
+    thinking = "\n\n".join(parts) if parts else None
+    return thinking, strip_think(text)
+
+
+async def emit_incremental_think(
+    buf: str,
+    emitted: str,
+    emit_fn: Any,
+) -> str:
+    """Extract new thinking from buf and emit if not yet emitted.
+
+    Returns the updated emitted state.  *emit_fn* is an async callable
+    that accepts a single reasoning string (e.g. ``hook.emit_reasoning``).
+    """
+    thinking, _ = extract_think(buf)
+    if thinking and thinking != emitted:
+        new = thinking[len(emitted):]
+        if new.strip():
+            await emit_fn(new.strip())
+        return thinking
+    return emitted
+
+
 def detect_image_mime(data: bytes) -> str | None:
     """Detect image MIME type from magic bytes, ignoring file extension."""
     if data[:8] == b"\x89PNG\r\n\x1a\n":
diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py
index b821d9bab..850e3caea 100644
--- a/tests/agent/test_runner.py
+++ b/tests/agent/test_runner.py
@@ -101,6 +101,132 @@ async def test_runner_preserves_reasoning_fields_and_tool_results():
     )
 
 
+@pytest.mark.asyncio
+async def test_runner_emits_anthropic_thinking_blocks():
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    emitted_reasoning: list[str] = []
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(
+            content="The answer is 42.",
+            thinking_blocks=[
+                {"type": "thinking", "thinking": "Let me analyze this step by step.", "signature": "sig1"},
+                {"type": "thinking", "thinking": "After careful consideration.", "signature": "sig2"},
+            ],
+            tool_calls=[],
+            usage={"prompt_tokens": 5, "completion_tokens": 3},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    class ReasoningHook(AgentHook):
+        async def emit_reasoning(self, reasoning_content: str | None) -> None:
+            if reasoning_content:
+                emitted_reasoning.append(reasoning_content)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "question"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=ReasoningHook(),
+    ))
+
+    assert result.final_content == "The answer is 42."
+    assert len(emitted_reasoning) == 1
+    assert "Let me analyze this" in emitted_reasoning[0]
+    assert "After careful consideration" in emitted_reasoning[0]
+
+
+@pytest.mark.asyncio
+async def test_runner_emits_inline_think_content_as_reasoning():
+    """Models returning <think>...</think> in content should have thinking extracted and emitted."""
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    emitted_reasoning: list[str] = []
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(
+            content="<think>Let me think about this...\nThe answer is 42.</think>The answer is 42.",
+            tool_calls=[],
+            usage={"prompt_tokens": 5, "completion_tokens": 3},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    class ReasoningHook(AgentHook):
+        async def emit_reasoning(self, reasoning_content: str | None) -> None:
+            if reasoning_content:
+                emitted_reasoning.append(reasoning_content)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "what is the answer?"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=ReasoningHook(),
+    ))
+
+    assert result.final_content == "The answer is 42."
+    assert len(emitted_reasoning) == 1
+    assert "Let me think about this" in emitted_reasoning[0]
+    assert "The answer is 42" in emitted_reasoning[0]
+
+
+@pytest.mark.asyncio
+async def test_runner_prefers_reasoning_content_over_inline_think():
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    emitted_reasoning: list[str] = []
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(
+            content="<think>inline thinking</think>The answer.",
+            reasoning_content="dedicated reasoning field",
+            tool_calls=[],
+            usage={"prompt_tokens": 5, "completion_tokens": 3},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    class ReasoningHook(AgentHook):
+        async def emit_reasoning(self, reasoning_content: str | None) -> None:
+            if reasoning_content:
+                emitted_reasoning.append(reasoning_content)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "question"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=ReasoningHook(),
+    ))
+
+    assert result.final_content == "The answer."
+    # Only the dedicated field should be emitted, not the inline <think> content
+    assert len(emitted_reasoning) == 1
+    assert emitted_reasoning[0] == "dedicated reasoning field"
+
+
 @pytest.mark.asyncio
 async def test_runner_calls_hooks_in_order():
     from nanobot.agent.hook import AgentHook, AgentHookContext
diff --git a/tests/utils/test_strip_think.py b/tests/utils/test_strip_think.py
index 5db93e658..65d952ad1 100644
--- a/tests/utils/test_strip_think.py
+++ b/tests/utils/test_strip_think.py
@@ -1,4 +1,4 @@
-from nanobot.utils.helpers import strip_think
+from nanobot.utils.helpers import extract_think, strip_think
 
 
 class TestStripThinkTag:
@@ -144,3 +144,84 @@ class TestStripThinkConservativePreserve:
     def test_literal_channel_marker_in_code_block_preserved(self):
         text = "Example:\n```\nif line.startswith('<channel|>'):\n    skip()\n```"
         assert strip_think(text) == text
+
+
+class TestExtractThink:
+
+    def test_no_think_tags(self):
+        thinking, clean = extract_think("Hello World")
+        assert thinking is None
+        assert clean == "Hello World"
+
+    def test_single_think_block(self):
+        text = "Hello <think>reasoning content\nhere</think> World"
+        thinking, clean = extract_think(text)
+        assert thinking == "reasoning content\nhere"
+        assert clean == "Hello  World"
+
+    def test_single_thought_block(self):
+        text = "Hello <thought>reasoning content</thought> World"
+        thinking, clean = extract_think(text)
+        assert thinking == "reasoning content"
+        assert clean == "Hello  World"
+
+    def test_multiple_think_blocks(self):
+        text = "A<think>first</think>B<thought>second</thought>C"
+        thinking, clean = extract_think(text)
+        assert thinking == "first\n\nsecond"
+        assert clean == "ABC"
+
+    def test_think_only_no_content(self):
+        text = "<think>just thinking</think>"
+        thinking, clean = extract_think(text)
+        assert thinking == "just thinking"
+        assert clean == ""
+
+    def test_unclosed_think_not_extracted(self):
+        # Unclosed blocks at start are stripped but NOT extracted
+        text = "<think>unclosed thinking..."
+        thinking, clean = extract_think(text)
+        assert thinking is None
+        assert clean == ""
+
+    def test_empty_think_block(self):
+        text = "Hello <think></think> World"
+        thinking, clean = extract_think(text)
+        # Empty blocks result in empty string after strip
+        assert thinking == ""
+        assert clean == "Hello  World"
+
+    def test_think_with_whitespace_only(self):
+        text = "Hello <think>   \n World"
+        thinking, clean = extract_think(text)
+        assert thinking is None
+        assert clean == "Hello <think>   \n World"
+
+    def test_mixed_think_and_thought(self):
+        text = "Start<think>first reasoning</think>middle<thought>second reasoning</thought>End"
+        thinking, clean = extract_think(text)
+        assert thinking == "first reasoning\n\nsecond reasoning"
+        assert clean == "StartmiddleEnd"
+
+    def test_real_world_ollama_response(self):
+        text = """<think>
+The user is asking about Python list comprehensions.
+Let me explain the syntax and give examples.
+</think>
+
+List comprehensions in Python provide a concise way to create lists. Here's the syntax:
+
+```python
+[expression for item in iterable if condition]
+```
+
+For example:
+```python
+squares = [x**2 for x in range(10)]
+```"""
+        thinking, clean = extract_think(text)
+        assert "list comprehensions" in thinking.lower()
+        assert "Let me explain" in thinking
+        assert "List comprehensions in Python" in clean
+        assert "<think>" not in clean
+        assert "</think>" not in clean

From 352aaf0627385126929af011f08273c2e4f8b9aa Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Tue, 12 May 2026 17:13:42 +0000
Subject: [PATCH 04/17] refactor(reasoning): unify reasoning extraction across
 providers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reasoning surfacing was split across three branches in runner.py plus
two separate streaming buffers (loop hook and runner progress stream),
with three independent display-side gates in the CLI. This collapsed
the policy into one source of truth and fixed two real bugs:

- Structured `reasoning_content` was suppressed whenever the answer was
  streamed, because the runner gated emission on `streamed_content`.
  Providers don't stream `reasoning_content`; it only arrives on the
  final response, so the answer stream and the reasoning channel are
  independent. Added `streamed_reasoning` to `AgentHookContext` to track
  the right bit.
- `channels.showReasoning` was subordinated to `sendProgress`. They are
  orthogonal — turning off progress streaming shouldn't silence
  reasoning. Reworked the CLI gates accordingly.

Single-helper consolidation:

- `extract_reasoning(reasoning_content, thinking_blocks, content)`
  returns `(reasoning_text, cleaned_content)` with a defined fallback
  order: dedicated field → Anthropic thinking_blocks → inline
  `<think>`/`<thought>` tags. Models that expose none of these
  short-circuit to `(None, content)` — zero overhead.
- `IncrementalThinkExtractor` replaces the ad-hoc `emit_incremental_think`
  function and its hand-rolled "emitted cursor" state in both the loop
  hook and the runner progress stream.

Also documented the new `showReasoning` channel option in
docs/configuration.md and noted its independence from sendProgress.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 docs/configuration.md                    |   1 +
 nanobot/agent/hook.py                    |   1 +
 nanobot/agent/loop.py                    |  13 ++-
 nanobot/agent/runner.py                  |  40 ++++-----
 nanobot/cli/commands.py                  |  24 +++---
 nanobot/utils/helpers.py                 |  96 +++++++++++++++------
 tests/agent/test_runner.py               | 105 +++++++++++++++++++++++
 tests/cli/test_interactive_retry_wait.py |  23 +++++
 tests/utils/test_strip_think.py          |  48 ++++++++++-
 9 files changed, 281 insertions(+), 70 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 01d55c20b..01ef46814 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -677,6 +677,7 @@ Global settings that apply to all channels. Configure under the `channels` secti
 |---------|---------|-------------|
 | `sendProgress` | `true` | Stream agent's text progress to the channel |
 | `sendToolHints` | `false` | Stream tool-call hints (e.g. `read_file("…")`) |
+| `showReasoning` | `false` | Surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `<think>` tags). Independent of `sendProgress`. |
 | `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) |
 | `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key is auto-resolved from the matching provider config. |
 | `transcriptionLanguage` | `null` | Optional ISO-639-1 language hint for audio transcription, e.g. `"en"`, `"ko"`, `"ja"`. |
diff --git a/nanobot/agent/hook.py b/nanobot/agent/hook.py
index 5e4ea4d4d..86775742d 100644
--- a/nanobot/agent/hook.py
+++ b/nanobot/agent/hook.py
@@ -22,6 +22,7 @@ class AgentHookContext:
     tool_results: list[Any] = field(default_factory=list)
     tool_events: list[dict[str, str]] = field(default_factory=list)
     streamed_content: bool = False
+    streamed_reasoning: bool = False
     final_content: str | None = None
     stop_reason: str | None = None
     error: str | None = None
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 9d2899b04..028d9ddd9 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -48,7 +48,7 @@ from nanobot.providers.factory import ProviderSnapshot
 from nanobot.session.manager import Session, SessionManager
 from nanobot.utils.artifacts import generated_image_paths_from_messages
 from nanobot.utils.document import extract_documents
-from nanobot.utils.helpers import image_placeholder_text
+from nanobot.utils.helpers import IncrementalThinkExtractor, image_placeholder_text
 from nanobot.utils.helpers import truncate_text as truncate_text_fn
 from nanobot.utils.image_generation_intent import image_generation_prompt
 from nanobot.utils.progress_events import (
@@ -101,22 +101,21 @@ class _LoopHook(AgentHook):
         self._metadata = metadata or {}
         self._session_key = session_key
         self._stream_buf = ""
-        self._emitted_thinking = ""
+        self._think_extractor = IncrementalThinkExtractor()
 
     def wants_streaming(self) -> bool:
         return self._on_stream is not None
 
     async def on_stream(self, context: AgentHookContext, delta: str) -> None:
-        from nanobot.utils.helpers import emit_incremental_think, strip_think
+        from nanobot.utils.helpers import strip_think
 
         prev_clean = strip_think(self._stream_buf)
         self._stream_buf += delta
         new_clean = strip_think(self._stream_buf)
         incremental = new_clean[len(prev_clean) :]
 
-        self._emitted_thinking = await emit_incremental_think(
-            self._stream_buf, self._emitted_thinking, self.emit_reasoning,
-        )
+        if await self._think_extractor.feed(self._stream_buf, self.emit_reasoning):
+            context.streamed_reasoning = True
 
         if incremental and self._on_stream:
             await self._on_stream(incremental)
@@ -125,7 +124,7 @@ class _LoopHook(AgentHook):
         if self._on_stream_end:
             await self._on_stream_end(resuming=resuming)
         self._stream_buf = ""
-        self._emitted_thinking = ""
+        self._think_extractor.reset()
 
     async def before_iteration(self, context: AgentHookContext) -> None:
         self._loop._current_iteration = context.iteration
diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py
index 9a1cc6d65..2713359be 100644
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@@ -17,11 +17,11 @@ from nanobot.agent.tools.ask import AskUserInterrupt
 from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
 from nanobot.utils.helpers import (
+    IncrementalThinkExtractor,
     build_assistant_message,
-    emit_incremental_think,
     estimate_message_tokens,
     estimate_prompt_tokens_chain,
-    extract_think,
+    extract_reasoning,
     find_legal_message_start,
     maybe_persist_tool_result,
     strip_think,
@@ -284,24 +284,15 @@ class AgentRunner:
             context.tool_calls = list(response.tool_calls)
             self._accumulate_usage(usage, raw_usage)
 
-            if response.reasoning_content:
-                if not context.streamed_content:
-                    await hook.emit_reasoning(response.reasoning_content)
-                if response.content:
-                    response.content = strip_think(response.content)
-            elif response.thinking_blocks:
-                # Anthropic extended thinking: extract from thinking_blocks.
-                if not context.streamed_content:
-                    parts = [tb.get("thinking", "") for tb in response.thinking_blocks if tb.get("type") == "thinking"]
-                    if parts:
-                        await hook.emit_reasoning("\n\n".join(parts))
-            elif response.content:
-                inline_thinking, clean_content = extract_think(response.content)
-                if inline_thinking:
-                    # Only emit if streaming didn't already handle it.
-                    if not context.streamed_content:
-                        await hook.emit_reasoning(inline_thinking)
-                    response.content = clean_content
+            reasoning_text, cleaned_content = extract_reasoning(
+                response.reasoning_content,
+                response.thinking_blocks,
+                response.content,
+            )
+            response.content = cleaned_content
+            if reasoning_text and not context.streamed_reasoning:
+                await hook.emit_reasoning(reasoning_text)
+                context.streamed_reasoning = True
 
             if response.should_execute_tools:
                 tool_calls = list(response.tool_calls)
@@ -654,10 +645,10 @@ class AgentRunner:
             )
         elif wants_progress_streaming:
             stream_buf = ""
-            emitted_thinking = ""
+            think_extractor = IncrementalThinkExtractor()
 
             async def _stream_progress(delta: str) -> None:
-                nonlocal stream_buf, emitted_thinking
+                nonlocal stream_buf
                 if not delta:
                     return
                 prev_clean = strip_think(stream_buf)
@@ -665,9 +656,8 @@ class AgentRunner:
                 new_clean = strip_think(stream_buf)
                 incremental = new_clean[len(prev_clean):]
 
-                emitted_thinking = await emit_incremental_think(
-                    stream_buf, emitted_thinking, hook.emit_reasoning,
-                )
+                if await think_extractor.feed(stream_buf, hook.emit_reasoning):
+                    context.streamed_reasoning = True
 
                 if incremental:
                     context.streamed_content = True
diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index 1c835962a..467683ed9 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -275,17 +275,17 @@ async def _maybe_print_interactive_progress(
 
     is_tool_hint = metadata.get("_tool_hint", False)
     is_reasoning = metadata.get("_reasoning", False)
+    if is_reasoning:
+        if channels_config and not channels_config.show_reasoning:
+            return True
+        _print_cli_reasoning(msg.content, thinking, renderer)
+        return True
     if channels_config and is_tool_hint and not channels_config.send_tool_hints:
         return True
     if channels_config and not is_tool_hint and not channels_config.send_progress:
         return True
-    if is_reasoning and channels_config and not channels_config.show_reasoning:
-        return True
 
-    if is_reasoning:
-        _print_cli_reasoning(msg.content, thinking, renderer)
-    else:
-        await _print_interactive_progress_line(msg.content, thinking, renderer)
+    await _print_interactive_progress_line(msg.content, thinking, renderer)
     return True
 
 
@@ -1147,16 +1147,16 @@ def agent(
     def _make_progress(renderer: StreamRenderer | None = None):
         async def _cli_progress(content: str, *, tool_hint: bool = False, reasoning: bool = False, **_kwargs: Any) -> None:
             ch = agent_loop.channels_config
+            if reasoning:
+                if ch and not ch.show_reasoning:
+                    return
+                _print_cli_reasoning(content, _thinking, renderer)
+                return
             if ch and tool_hint and not ch.send_tool_hints:
                 return
             if ch and not tool_hint and not ch.send_progress:
                 return
-            if reasoning and ch and not ch.show_reasoning:
-                return
-            if reasoning:
-                _print_cli_reasoning(content, _thinking, renderer)
-            else:
-                _print_cli_progress_line(content, _thinking, renderer)
+            _print_cli_progress_line(content, _thinking, renderer)
         return _cli_progress
 
     if message:
diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py
index 5301f4885..f348bc183 100644
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@@ -72,17 +72,11 @@ def strip_think(text: str) -> str:
 
 
 def extract_think(text: str) -> tuple[str | None, str]:
-    """Extract thinking/reasoning content from <think> and <thought> tags.
+    """Extract thinking content from inline ``<think>`` / ``<thought>`` blocks.
 
-    Returns (thinking_text, cleaned_text) where:
-      - thinking_text: concatenated content from all <think>...</think> and
-        <thought>...</thought> blocks, or None if none found.
-      - cleaned_text: the input with all thinking blocks removed (same as
-        strip_think()).
-
-    Only extracts from well-formed closed blocks. Unclosed trailing tags
-    (common during streaming) are stripped without extraction — use
-    strip_think() for pure streaming cleanup.
+    Returns ``(thinking_text, cleaned_text)``. Only closed blocks are
+    extracted; unclosed streaming prefixes are stripped from the cleaned
+    text but not surfaced — :func:`strip_think` handles that case.
     """
     parts: list[str] = []
     for m in re.finditer(r"<think>([\s\S]*?)</think>", text):
@@ -93,23 +87,75 @@ def extract_think(text: str) -> tuple[str | None, str]:
     return thinking, strip_think(text)
 
 
-async def emit_incremental_think(
-    buf: str,
-    emitted: str,
-    emit_fn: Any,
-) -> str:
-    """Extract new thinking from buf and emit if not yet emitted.
+class IncrementalThinkExtractor:
+    """Stateful inline ``<think>`` extractor for streaming buffers.
 
-    Returns the updated emitted state.  *emit_fn* is an async callable
-    that accepts a single reasoning string (e.g. ``hook.emit_reasoning``).
+    Streaming providers expose only a single content delta channel. When a
+    model embeds reasoning in ``<think>...</think>`` blocks inside that
+    channel, callers need to surface the reasoning incrementally as it
+    arrives without re-emitting earlier text. This holds the "already
+    emitted" cursor so the runner and the loop hook share one shape.
     """
-    thinking, _ = extract_think(buf)
-    if thinking and thinking != emitted:
-        new = thinking[len(emitted):]
-        if new.strip():
-            await emit_fn(new.strip())
-        return thinking
-    return emitted
+
+    __slots__ = ("_emitted",)
+
+    def __init__(self) -> None:
+        self._emitted = ""
+
+    def reset(self) -> None:
+        self._emitted = ""
+
+    async def feed(self, buf: str, emit: Any) -> bool:
+        """Emit any new thinking text found in ``buf``.
+
+        Returns True if anything was emitted this call. ``emit`` is an
+        async callable taking a single string (typically
+        ``hook.emit_reasoning``).
+        """
+        thinking, _ = extract_think(buf)
+        if not thinking or thinking == self._emitted:
+            return False
+        new = thinking[len(self._emitted):].strip()
+        self._emitted = thinking
+        if not new:
+            return False
+        await emit(new)
+        return True
+
+
+def extract_reasoning(
+    reasoning_content: str | None,
+    thinking_blocks: list[dict[str, Any]] | None,
+    content: str | None,
+) -> tuple[str | None, str | None]:
+    """Return ``(reasoning_text, cleaned_content)`` from one model response.
+
+    Single source of truth for "what reasoning did this response carry, and
+    what answer text remains after we peel it out". Fallback order:
+
+    1. Dedicated ``reasoning_content`` (DeepSeek-R1, Kimi, MiMo, OpenAI
+       reasoning models, Bedrock).
+    2. Anthropic ``thinking_blocks``.
+    3. Inline ``<think>`` / ``<thought>`` blocks in ``content``.
+
+    Only one source contributes per response; lower-priority sources are
+    ignored if a higher-priority one is present, but inline ``<think>``
+    tags are still stripped from ``content`` so they never leak into the
+    final answer.
+    """
+    if reasoning_content:
+        return reasoning_content, strip_think(content) if content else content
+    if thinking_blocks:
+        parts = [
+            tb.get("thinking", "")
+            for tb in thinking_blocks
+            if isinstance(tb, dict) and tb.get("type") == "thinking"
+        ]
+        joined = "\n\n".join(p for p in parts if p)
+        return (joined or None), strip_think(content) if content else content
+    if content:
+        return extract_think(content)
+    return None, content
 
 
 def detect_image_mime(data: bytes) -> str | None:
diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py
index 850e3caea..d50b82cd4 100644
--- a/tests/agent/test_runner.py
+++ b/tests/agent/test_runner.py
@@ -227,6 +227,111 @@ async def test_runner_prefers_reasoning_content_over_inline_think():
     assert emitted_reasoning[0] == "dedicated reasoning field"
 
 
+@pytest.mark.asyncio
+async def test_runner_emits_reasoning_content_even_when_answer_was_streamed():
+    """`reasoning_content` arrives only on the final response; streaming the
+    answer must not suppress it (the answer stream and the reasoning channel
+    are independent — only the reasoning-already-emitted bit matters)."""
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    provider.supports_progress_deltas = True
+    emitted_reasoning: list[str] = []
+
+    async def chat_stream_with_retry(*, on_content_delta=None, **kwargs):
+        if on_content_delta:
+            await on_content_delta("The ")
+            await on_content_delta("answer.")
+        return LLMResponse(
+            content="The answer.",
+            reasoning_content="step-by-step deduction",
+            tool_calls=[],
+            usage={"prompt_tokens": 5, "completion_tokens": 3},
+        )
+
+    provider.chat_stream_with_retry = chat_stream_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    class ReasoningHook(AgentHook):
+        async def emit_reasoning(self, reasoning_content: str | None) -> None:
+            if reasoning_content:
+                emitted_reasoning.append(reasoning_content)
+
+    progress_calls: list[str] = []
+
+    async def _progress(content: str, **_kwargs):
+        progress_calls.append(content)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "question"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=ReasoningHook(),
+        stream_progress_deltas=True,
+        progress_callback=_progress,
+    ))
+
+    assert result.final_content == "The answer."
+    # The answer must have streamed AND the dedicated reasoning_content must
+    # have been emitted exactly once after the stream completed.
+    assert progress_calls, "answer should have streamed via progress callback"
+    assert emitted_reasoning == ["step-by-step deduction"]
+
+
+@pytest.mark.asyncio
+async def test_runner_does_not_double_emit_when_inline_think_already_streamed():
+    """Inline `<think>` blocks streamed incrementally during the answer
+    stream must not be re-emitted from the final response."""
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    provider.supports_progress_deltas = True
+    emitted_reasoning: list[str] = []
+
+    async def chat_stream_with_retry(*, on_content_delta=None, **kwargs):
+        if on_content_delta:
+            await on_content_delta("<think>working...</think>")
+            await on_content_delta("The answer.")
+        return LLMResponse(
+            content="<think>working...</think>The answer.",
+            tool_calls=[],
+            usage={"prompt_tokens": 5, "completion_tokens": 3},
+        )
+
+    provider.chat_stream_with_retry = chat_stream_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    class ReasoningHook(AgentHook):
+        async def emit_reasoning(self, reasoning_content: str | None) -> None:
+            if reasoning_content:
+                emitted_reasoning.append(reasoning_content)
+
+    async def _progress(content: str, **_kwargs):
+        pass
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "question"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=ReasoningHook(),
+        stream_progress_deltas=True,
+        progress_callback=_progress,
+    ))
+
+    assert result.final_content == "The answer."
+    assert emitted_reasoning == ["working..."]
+
+
 @pytest.mark.asyncio
 async def test_runner_calls_hooks_in_order():
     from nanobot.agent.hook import AgentHook, AgentHookContext
diff --git a/tests/cli/test_interactive_retry_wait.py b/tests/cli/test_interactive_retry_wait.py
index e693b057c..7ddef1c48 100644
--- a/tests/cli/test_interactive_retry_wait.py
+++ b/tests/cli/test_interactive_retry_wait.py
@@ -88,3 +88,26 @@ async def test_non_reasoning_progress_not_affected_by_show_reasoning():
 
     assert handled is True
     assert calls == ["working on it..."]
+
+
+@pytest.mark.asyncio
+async def test_reasoning_shown_when_send_progress_disabled():
+    """Reasoning display is governed by `show_reasoning` alone, independent
+    of `send_progress` — the two knobs are orthogonal."""
+    calls: list[str] = []
+    channels_config = SimpleNamespace(
+        send_progress=False, send_tool_hints=False, show_reasoning=True,
+    )
+    msg = SimpleNamespace(
+        content="Let me think about this...",
+        metadata={"_progress": True, "_reasoning": True},
+    )
+
+    with patch(
+        "nanobot.cli.commands._print_cli_reasoning",
+        side_effect=lambda t, th, r=None: calls.append(t),
+    ):
+        handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
+
+    assert handled is True
+    assert calls == ["Let me think about this..."]
diff --git a/tests/utils/test_strip_think.py b/tests/utils/test_strip_think.py
index 65d952ad1..f1048f40c 100644
--- a/tests/utils/test_strip_think.py
+++ b/tests/utils/test_strip_think.py
@@ -1,4 +1,4 @@
-from nanobot.utils.helpers import extract_think, strip_think
+from nanobot.utils.helpers import extract_reasoning, extract_think, strip_think
 
 
 class TestStripThinkTag:
@@ -225,3 +225,49 @@ squares = [x**2 for x in range(10)]
         assert "List comprehensions in Python" in clean
         assert "<think>" not in clean
         assert "</think>" not in clean
+
+
+class TestExtractReasoning:
+    """Single source of truth for reasoning extraction across all providers."""
+
+    def test_prefers_reasoning_content_and_strips_inline_think(self):
+        # Dedicated field wins; inline tags are still scrubbed from content.
+        reasoning, content = extract_reasoning(
+            "dedicated",
+            None,
+            "<think>inline</think>visible answer",
+        )
+        assert reasoning == "dedicated"
+        assert content == "visible answer"
+
+    def test_falls_back_to_thinking_blocks(self):
+        reasoning, content = extract_reasoning(
+            None,
+            [
+                {"type": "thinking", "thinking": "step 1"},
+                {"type": "thinking", "thinking": "step 2"},
+                {"type": "redacted_thinking"},
+            ],
+            "hello",
+        )
+        assert reasoning == "step 1\n\nstep 2"
+        assert content == "hello"
+
+    def test_falls_back_to_inline_think_tags(self):
+        reasoning, content = extract_reasoning(
+            None, None, "<think>plan</think>answer"
+        )
+        assert reasoning == "plan"
+        assert content == "answer"
+
+    def test_no_reasoning_returns_none(self):
+        reasoning, content = extract_reasoning(None, None, "plain answer")
+        assert reasoning is None
+        assert content == "plain answer"
+
+    def test_empty_thinking_blocks_falls_through_to_inline(self):
+        reasoning, content = extract_reasoning(
+            None, [], "<think>plan</think>answer"
+        )
+        assert reasoning == "plan"
+        assert content == "answer"

From a6b059d37924059eef322261fcaa8340a6528fa4 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 06:27:53 +0000
Subject: [PATCH 05/17] refactor(reasoning): make channel plugins own reasoning
 rendering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reasoning was being shipped to every channel as a generic progress
message with a `_reasoning: true` flag. Two problems with that:

1. Channels without a low-emphasis UI primitive (Telegram, Slack,
   Discord, Feishu...) would dump raw model thoughts as ordinary
   replies, polluting the conversation.
2. The agent loop double-gated by inspecting `channels_config`, which
   coupled the loop to display policy.

Treat reasoning as its own plugin action — `BaseChannel.send_reasoning`
defaults to a documented no-op; channels that have a fitting affordance
override. ChannelManager routes `_reasoning` outbounds to that method
only when the channel opts in via `show_reasoning` (camelCase alias
`showReasoning` mirrors `sendProgress`). Plugins that don't override
silently drop reasoning — "no fit, no leak" is the contract.

Reference implementation lands for WebSocket / WebUI: a new
`kind: "reasoning"` frame, parked on the active assistant bubble as a
collapsible `Thinking` group above the answer. CLI keeps its existing
direct path (it doesn't go through the bus). `ChannelsConfig.show_reasoning`
flips to `true` by default — only adapted channels surface anything,
others stay quiet.

Loop net diff is -3 lines: the `channels_config.show_reasoning` check
moves out, leaving emit_reasoning a one-liner that publishes and trusts
the channel to decide.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 docs/configuration.md                         |   2 +-
 nanobot/agent/loop.py                         |  11 +-
 nanobot/channels/base.py                      |  13 ++
 nanobot/channels/manager.py                   |  20 +-
 nanobot/channels/websocket.py                 |  24 +++
 nanobot/config/schema.py                      |   2 +-
 .../test_channel_manager_reasoning.py         | 183 ++++++++++++++++++
 tests/channels/test_websocket_channel.py      |  54 ++++++
 webui/src/components/MessageBubble.tsx        |  60 +++++-
 webui/src/hooks/useNanobotStream.ts           |  35 +++-
 webui/src/i18n/locales/en/common.json         |   1 +
 webui/src/i18n/locales/zh-CN/common.json      |   1 +
 webui/src/lib/types.ts                        |   6 +-
 webui/src/tests/message-bubble.test.tsx       |  33 ++++
 webui/src/tests/useNanobotStream.test.tsx     |  72 +++++++
 15 files changed, 504 insertions(+), 13 deletions(-)
 create mode 100644 tests/channels/test_channel_manager_reasoning.py

diff --git a/docs/configuration.md b/docs/configuration.md
index 85091d1f7..ed5a534cf 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -743,7 +743,7 @@ Global settings that apply to all channels. Configure under the `channels` secti
 |---------|---------|-------------|
 | `sendProgress` | `true` | Stream agent's text progress to the channel |
 | `sendToolHints` | `false` | Stream tool-call hints (e.g. `read_file("…")`) |
-| `showReasoning` | `false` | Surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `<think>` tags). Independent of `sendProgress`. |
+| `showReasoning` | `true` | Allow channels to surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `<think>` tags). The setting is a plugin opt-in: even when `true`, a channel only renders reasoning if it overrides `send_reasoning()`. Currently surfaced on CLI and WebSocket/WebUI; other channels (Telegram, Slack, Discord, ...) keep it as a silent no-op until their bubble UI is adapted. Independent of `sendProgress`. |
 | `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) |
 | `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key is auto-resolved from the matching provider config. |
 | `transcriptionLanguage` | `null` | Optional ISO-639-1 language hint for audio transcription, e.g. `"en"`, `"ko"`, `"ja"`. |
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index c7091a5f6..e7b045f01 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -147,10 +147,13 @@ class _LoopHook(AgentHook):
         )
 
     async def emit_reasoning(self, reasoning_content: str | None) -> None:
-        """Send reasoning/thinking content as progress before the main answer."""
-        ch = self._loop.channels_config
-        if not ch or not ch.show_reasoning:
-            return
+        """Publish reasoning content; channel plugins decide whether to render.
+
+        The loop is intentionally not the gate: ``ChannelsConfig.show_reasoning``
+        is a default that ``ChannelManager`` and ``BaseChannel.send_reasoning``
+        consult per channel. A channel without a low-emphasis UI primitive
+        keeps the base no-op and the content drops at the dispatch boundary.
+        """
         if self._on_progress and reasoning_content:
             await self._on_progress(reasoning_content, reasoning=True)
 
diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py
index 087677494..c82003d88 100644
--- a/nanobot/channels/base.py
+++ b/nanobot/channels/base.py
@@ -28,6 +28,7 @@ class BaseChannel(ABC):
     transcription_language: str | None = None
     send_progress: bool = True
     send_tool_hints: bool = False
+    show_reasoning: bool = True
 
     def __init__(self, config: Any, bus: MessageBus):
         """
@@ -120,6 +121,18 @@ class BaseChannel(ABC):
         """
         pass
 
+    async def send_reasoning(self, msg: OutboundMessage) -> None:
+        """Surface model reasoning/thinking content.
+
+        Default is no-op. Channels with a native low-emphasis primitive
+        (Slack context block, Telegram expandable blockquote, Discord
+        subtext, WebUI italic bubble, ...) override to render reasoning
+        as a subordinate trace. Channels without a suitable affordance
+        keep this no-op: silently dropping is better than leaking raw
+        model thoughts as regular conversational messages.
+        """
+        return
+
     @property
     def supports_streaming(self) -> bool:
         """True when config enables streaming AND this subclass implements send_delta."""
diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py
index 1d92bb879..abf9bf043 100644
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@@ -36,6 +36,7 @@ _SEND_RETRY_DELAYS = (1, 2, 4)
 _BOOL_CAMEL_ALIASES: dict[str, str] = {
     "send_progress": "sendProgress",
     "send_tool_hints": "sendToolHints",
+    "show_reasoning": "showReasoning",
 }
 
 class ChannelManager:
@@ -104,6 +105,9 @@ class ChannelManager:
                 channel.send_tool_hints = self._resolve_bool_override(
                     section, "send_tool_hints", self.config.channels.send_tool_hints,
                 )
+                channel.show_reasoning = self._resolve_bool_override(
+                    section, "show_reasoning", self.config.channels.show_reasoning,
+                )
                 self.channels[name] = channel
                 logger.info("{} channel enabled", cls.display_name)
             except Exception as e:
@@ -279,6 +283,18 @@ class ChannelManager:
                         timeout=1.0
                     )
 
+                if msg.metadata.get("_reasoning"):
+                    # Reasoning rides its own plugin channel: only delivered when
+                    # the destination channel both opts in (``show_reasoning``)
+                    # and overrides ``send_reasoning``. Channels without a
+                    # low-emphasis UI primitive keep the base no-op and the
+                    # content silently drops here rather than leak as a
+                    # conversational reply.
+                    channel = self.channels.get(msg.channel)
+                    if channel is not None and channel.show_reasoning:
+                        await self._send_with_retry(channel, msg)
+                    continue
+
                 if msg.metadata.get("_progress"):
                     if msg.metadata.get("_tool_hint") and not self._should_send_progress(
                         msg.channel, tool_hint=True,
@@ -329,7 +345,9 @@ class ChannelManager:
     @staticmethod
     async def _send_once(channel: BaseChannel, msg: OutboundMessage) -> None:
         """Send one outbound message without retry policy."""
-        if msg.metadata.get("_stream_delta") or msg.metadata.get("_stream_end"):
+        if msg.metadata.get("_reasoning"):
+            await channel.send_reasoning(msg)
+        elif msg.metadata.get("_stream_delta") or msg.metadata.get("_stream_end"):
             await channel.send_delta(msg.chat_id, msg.content, msg.metadata)
         elif not msg.metadata.get("_streamed"):
             await channel.send(msg)
diff --git a/nanobot/channels/websocket.py b/nanobot/channels/websocket.py
index 76ca513d0..bba68397f 100644
--- a/nanobot/channels/websocket.py
+++ b/nanobot/channels/websocket.py
@@ -1487,6 +1487,30 @@ class WebSocketChannel(BaseChannel):
         for connection in conns:
             await self._safe_send_to(connection, raw, label=" ")
 
+    async def send_reasoning(self, msg: OutboundMessage) -> None:
+        """Stream model reasoning as a subordinate trace frame.
+
+        Renders as ``kind=reasoning`` alongside the existing ``tool_hint`` /
+        ``progress`` frames; the WebUI mounts these on the active assistant
+        bubble rather than as a conversational reply.
+        """
+        conns = list(self._subs.get(msg.chat_id, ()))
+        if not conns:
+            return
+        if not msg.content:
+            return
+        payload: dict[str, Any] = {
+            "event": "message",
+            "chat_id": msg.chat_id,
+            "text": msg.content,
+            "kind": "reasoning",
+        }
+        if msg.reply_to:
+            payload["reply_to"] = msg.reply_to
+        raw = json.dumps(payload, ensure_ascii=False)
+        for connection in conns:
+            await self._safe_send_to(connection, raw, label=" reasoning ")
+
     async def send_delta(
         self,
         chat_id: str,
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
index 72110eedd..ff7454d71 100644
--- a/nanobot/config/schema.py
+++ b/nanobot/config/schema.py
@@ -35,7 +35,7 @@ class ChannelsConfig(Base):
 
     send_progress: bool = True  # stream agent's text progress to the channel
     send_tool_hints: bool = False  # stream tool-call hints (e.g. read_file("…"))
-    show_reasoning: bool = False  # show model reasoning/thinking content
+    show_reasoning: bool = True  # surface model reasoning when channel implements it
     send_max_retries: int = Field(default=3, ge=0, le=10)  # Max delivery attempts (initial send included)
     transcription_provider: str = "groq"  # Voice transcription backend: "groq" or "openai"
     transcription_language: str | None = Field(default=None, pattern=r"^[a-z]{2,3}$")  # Optional ISO-639-1 hint for audio transcription
diff --git a/tests/channels/test_channel_manager_reasoning.py b/tests/channels/test_channel_manager_reasoning.py
new file mode 100644
index 000000000..2200f4be2
--- /dev/null
+++ b/tests/channels/test_channel_manager_reasoning.py
@@ -0,0 +1,183 @@
+"""Tests for ChannelManager routing of model reasoning content.
+
+Reasoning is delivered as a separate plugin action (``send_reasoning``)
+rather than a metadata flag on a regular outbound. The manager routes
+``_reasoning`` messages only to channels that opt in via
+``channel.show_reasoning``; channels without a low-emphasis UI primitive
+keep the base no-op and the content silently drops at dispatch.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock
+
+import pytest
+
+from nanobot.bus.events import OutboundMessage
+from nanobot.bus.queue import MessageBus
+from nanobot.channels.base import BaseChannel
+from nanobot.channels.manager import ChannelManager
+from nanobot.config.schema import Config
+
+
+class _MockChannel(BaseChannel):
+    name = "mock"
+    display_name = "Mock"
+
+    def __init__(self, config, bus):
+        super().__init__(config, bus)
+        self._send_mock = AsyncMock()
+        self._send_reasoning_mock = AsyncMock()
+
+    async def start(self):  # pragma: no cover - not exercised
+        pass
+
+    async def stop(self):  # pragma: no cover - not exercised
+        pass
+
+    async def send(self, msg):
+        return await self._send_mock(msg)
+
+    async def send_reasoning(self, msg):
+        return await self._send_reasoning_mock(msg)
+
+
+@pytest.fixture
+def manager() -> ChannelManager:
+    mgr = ChannelManager(Config(), MessageBus())
+    mgr.channels["mock"] = _MockChannel({}, mgr.bus)
+    return mgr
+
+
+@pytest.mark.asyncio
+async def test_reasoning_routes_to_send_reasoning_not_send(manager):
+    channel = manager.channels["mock"]
+    msg = OutboundMessage(
+        channel="mock",
+        chat_id="c1",
+        content="step-by-step thinking",
+        metadata={"_progress": True, "_reasoning": True},
+    )
+    await manager._send_once(channel, msg)
+    channel._send_reasoning_mock.assert_awaited_once_with(msg)
+    channel._send_mock.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_dispatch_drops_reasoning_when_channel_opts_out(manager):
+    channel = manager.channels["mock"]
+    channel.show_reasoning = False
+    msg = OutboundMessage(
+        channel="mock",
+        chat_id="c1",
+        content="hidden thinking",
+        metadata={"_progress": True, "_reasoning": True},
+    )
+    await manager.bus.publish_outbound(msg)
+
+    pumped = await _pump_one(manager)
+
+    assert pumped is True
+    channel._send_reasoning_mock.assert_not_awaited()
+    channel._send_mock.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_dispatch_delivers_reasoning_when_channel_opts_in(manager):
+    channel = manager.channels["mock"]
+    channel.show_reasoning = True
+    msg = OutboundMessage(
+        channel="mock",
+        chat_id="c1",
+        content="visible thinking",
+        metadata={"_progress": True, "_reasoning": True},
+    )
+    await manager.bus.publish_outbound(msg)
+
+    pumped = await _pump_one(manager)
+
+    assert pumped is True
+    channel._send_reasoning_mock.assert_awaited_once()
+    delivered = channel._send_reasoning_mock.await_args.args[0]
+    assert delivered.content == "visible thinking"
+
+
+@pytest.mark.asyncio
+async def test_dispatch_silently_drops_reasoning_for_unknown_channel(manager):
+    msg = OutboundMessage(
+        channel="ghost",
+        chat_id="c1",
+        content="nobody home",
+        metadata={"_progress": True, "_reasoning": True},
+    )
+    await manager.bus.publish_outbound(msg)
+
+    pumped = await _pump_one(manager)
+
+    assert pumped is True
+    # Mock channel must not receive anything destined for a different channel.
+    manager.channels["mock"]._send_reasoning_mock.assert_not_awaited()
+    manager.channels["mock"]._send_mock.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_base_channel_send_reasoning_is_noop_safe():
+    """Plugins that don't override `send_reasoning` must not blow up."""
+
+    class _Plain(BaseChannel):
+        name = "plain"
+        display_name = "Plain"
+
+        async def start(self):  # pragma: no cover
+            pass
+
+        async def stop(self):  # pragma: no cover
+            pass
+
+        async def send(self, msg):  # pragma: no cover
+            pass
+
+    channel = _Plain({}, MessageBus())
+    # No exception, returns None.
+    assert await channel.send_reasoning(
+        OutboundMessage(channel="plain", chat_id="c", content="x", metadata={})
+    ) is None
+
+
+@pytest.mark.asyncio
+async def test_reasoning_routing_does_not_consult_send_progress(manager):
+    """`show_reasoning` is orthogonal to `send_progress` — turning off
+    progress streaming must not silence reasoning."""
+    channel = manager.channels["mock"]
+    channel.send_progress = False
+    channel.show_reasoning = True
+    msg = OutboundMessage(
+        channel="mock",
+        chat_id="c1",
+        content="still surfaces",
+        metadata={"_progress": True, "_reasoning": True},
+    )
+    await manager.bus.publish_outbound(msg)
+
+    pumped = await _pump_one(manager)
+
+    assert pumped is True
+    channel._send_reasoning_mock.assert_awaited_once()
+
+
+async def _pump_one(manager: ChannelManager) -> bool:
+    """Drive the dispatcher for exactly one message, then cancel."""
+    import asyncio
+
+    task = asyncio.create_task(manager._dispatch_outbound())
+    # Yield control until the queue drains.
+    for _ in range(50):
+        await asyncio.sleep(0.01)
+        if manager.bus.outbound.qsize() == 0:
+            break
+    task.cancel()
+    try:
+        await task
+    except asyncio.CancelledError:
+        pass
+    return True
diff --git a/tests/channels/test_websocket_channel.py b/tests/channels/test_websocket_channel.py
index 92b61f7d6..0e682ed0a 100644
--- a/tests/channels/test_websocket_channel.py
+++ b/tests/channels/test_websocket_channel.py
@@ -358,6 +358,60 @@ async def test_send_delta_emits_delta_and_stream_end() -> None:
     assert second["stream_id"] == "sid"
 
 
+@pytest.mark.asyncio
+async def test_send_reasoning_emits_reasoning_kind_frame() -> None:
+    bus = MagicMock()
+    channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
+    mock_ws = AsyncMock()
+    channel._attach(mock_ws, "chat-1")
+
+    await channel.send_reasoning(OutboundMessage(
+        channel="websocket",
+        chat_id="chat-1",
+        content="step-by-step thinking",
+        metadata={"_progress": True, "_reasoning": True},
+    ))
+
+    mock_ws.send.assert_awaited_once()
+    payload = json.loads(mock_ws.send.await_args.args[0])
+    assert payload["event"] == "message"
+    assert payload["chat_id"] == "chat-1"
+    assert payload["text"] == "step-by-step thinking"
+    assert payload["kind"] == "reasoning"
+
+
+@pytest.mark.asyncio
+async def test_send_reasoning_drops_empty_content() -> None:
+    """Empty reasoning emits nothing — keeps the frontend bubble clean."""
+    bus = MagicMock()
+    channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
+    mock_ws = AsyncMock()
+    channel._attach(mock_ws, "chat-1")
+
+    await channel.send_reasoning(OutboundMessage(
+        channel="websocket",
+        chat_id="chat-1",
+        content="",
+        metadata={"_reasoning": True},
+    ))
+
+    mock_ws.send.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_send_reasoning_without_subscribers_is_noop() -> None:
+    bus = MagicMock()
+    channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
+
+    await channel.send_reasoning(OutboundMessage(
+        channel="websocket",
+        chat_id="unattached",
+        content="thinking",
+        metadata={"_reasoning": True},
+    ))
+    # No subscribers, no exception, no send.
+
+
 @pytest.mark.asyncio
 async def test_send_turn_end_emits_turn_end_event() -> None:
     bus = MagicMock()
diff --git a/webui/src/components/MessageBubble.tsx b/webui/src/components/MessageBubble.tsx
index 3bd580567..556460824 100644
--- a/webui/src/components/MessageBubble.tsx
+++ b/webui/src/components/MessageBubble.tsx
@@ -1,5 +1,5 @@
-import { useCallback, useEffect, useRef, useState } from "react";
-import { Check, ChevronRight, Copy, FileIcon, ImageIcon, PlaySquare, Wrench } from "lucide-react";
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import { Check, ChevronRight, Copy, FileIcon, ImageIcon, PlaySquare, Sparkles, Wrench } from "lucide-react";
 import { useTranslation } from "react-i18next";
 
 import { ImageLightbox } from "@/components/ImageLightbox";
@@ -85,12 +85,14 @@ export function MessageBubble({ message }: MessageBubbleProps) {
 
   const empty = message.content.trim().length === 0;
   const media = message.media ?? [];
+  const reasoning = message.role === "assistant" ? message.reasoning ?? [] : [];
   const showAssistantActions = message.role === "assistant" && !message.isStreaming && !empty;
   return (
     <div className={cn("w-full text-[15px]", baseAnim)} style={{ lineHeight: "var(--cjk-line-height)" }}>
-      {empty && message.isStreaming ? (
+      {reasoning.length > 0 ? <ReasoningBubble lines={reasoning} /> : null}
+      {empty && message.isStreaming && reasoning.length === 0 ? (
         <TypingDots />
-      ) : (
+      ) : empty && message.isStreaming ? null : (
         <>
           <MarkdownText>{message.content}</MarkdownText>
           {message.isStreaming && <StreamCursor />}
@@ -433,3 +435,53 @@ function TraceGroup({ message, animClass }: TraceGroupProps) {
     </div>
   );
 }
+
+interface ReasoningBubbleProps {
+  lines: string[];
+}
+
+/**
+ * Subordinate "thinking" trace shown above an assistant turn. Mirrors the
+ * CLI's italic dim ``ChevronRight`` row visually; collapsible because
+ * reasoning from models like DeepSeek-R1 / o-series can run long. Defaults
+ * to expanded while the answer is still streaming (so the user sees the
+ * model "thinking out loud"), but the toggle persists across rerenders.
+ */
+function ReasoningBubble({ lines }: ReasoningBubbleProps) {
+  const { t } = useTranslation();
+  const [open, setOpen] = useState(true);
+  const text = useMemo(() => lines.join("\n\n"), [lines]);
+  return (
+    <div className="mb-2 w-full animate-in fade-in-0 slide-in-from-top-1 duration-200">
+      <button
+        type="button"
+        onClick={() => setOpen((v) => !v)}
+        className={cn(
+          "flex w-full items-center gap-2 rounded-md px-2 py-1.5",
+          "text-xs text-muted-foreground transition-colors hover:bg-muted/45",
+        )}
+        aria-expanded={open}
+      >
+        <Sparkles className="h-3.5 w-3.5" aria-hidden />
+        <span className="font-medium">{t("message.reasoning", { defaultValue: "Thinking" })}</span>
+        <ChevronRight
+          aria-hidden
+          className={cn(
+            "ml-auto h-3.5 w-3.5 transition-transform duration-200",
+            open && "rotate-90",
+          )}
+        />
+      </button>
+      {open && (
+        <div
+          className={cn(
+            "mt-1 whitespace-pre-wrap break-words border-l border-muted-foreground/20 pl-3",
+            "text-[12.5px] italic leading-relaxed text-muted-foreground/85",
+          )}
+        >
+          {text}
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts
index 8ec1a9ac4..ee460cf56 100644
--- a/webui/src/hooks/useNanobotStream.ts
+++ b/webui/src/hooks/useNanobotStream.ts
@@ -183,10 +183,43 @@ export function useNanobotStream(
       if (ev.event === "message") {
         if (
           suppressStreamUntilTurnEndRef.current &&
-          (ev.kind === "tool_hint" || ev.kind === "progress")
+          (ev.kind === "tool_hint" || ev.kind === "progress" || ev.kind === "reasoning")
         ) {
           return;
         }
+        // Model reasoning rides its own channel: stash it on the next
+        // assistant turn so the bubble renders it as a subordinate trace.
+        // If the assistant message hasn't materialized yet (typical, since
+        // reasoning fires before tool calls/answers), park it on a sentinel
+        // pending row that the next assistant message absorbs.
+        if (ev.kind === "reasoning") {
+          const line = ev.text;
+          if (!line) return;
+          setMessages((prev) => {
+            for (let i = prev.length - 1; i >= 0; i -= 1) {
+              const candidate = prev[i];
+              if (candidate.role === "assistant" && candidate.kind !== "trace") {
+                const merged: UIMessage = {
+                  ...candidate,
+                  reasoning: [...(candidate.reasoning ?? []), line],
+                };
+                return [...prev.slice(0, i), merged, ...prev.slice(i + 1)];
+              }
+            }
+            return [
+              ...prev,
+              {
+                id: crypto.randomUUID(),
+                role: "assistant",
+                content: "",
+                isStreaming: true,
+                reasoning: [line],
+                createdAt: Date.now(),
+              },
+            ];
+          });
+          return;
+        }
         // Intermediate agent breadcrumbs (tool-call hints, raw progress).
         // Attach them to the last trace row if it was the last emitted item
         // so a sequence of calls collapses into one compact trace group.
diff --git a/webui/src/i18n/locales/en/common.json b/webui/src/i18n/locales/en/common.json
index 4cf1b6391..1f6eb7b54 100644
--- a/webui/src/i18n/locales/en/common.json
+++ b/webui/src/i18n/locales/en/common.json
@@ -332,6 +332,7 @@
     "assistantTyping": "Assistant is typing",
     "toolSingle": "Using a tool",
     "toolMany": "Used {{count}} tools",
+    "reasoning": "Thinking",
     "imageAttachment": "Image attachment",
     "copyReply": "Copy reply",
     "copiedReply": "Copied reply"
diff --git a/webui/src/i18n/locales/zh-CN/common.json b/webui/src/i18n/locales/zh-CN/common.json
index fed932f29..662a5f7bd 100644
--- a/webui/src/i18n/locales/zh-CN/common.json
+++ b/webui/src/i18n/locales/zh-CN/common.json
@@ -320,6 +320,7 @@
     "assistantTyping": "助手正在输入",
     "toolSingle": "正在使用工具",
     "toolMany": "已使用 {{count}} 个工具",
+    "reasoning": "思考中",
     "imageAttachment": "图片附件",
     "copyReply": "复制回复",
     "copiedReply": "已复制回复"
diff --git a/webui/src/lib/types.ts b/webui/src/lib/types.ts
index 5e7dc9288..0338b75f3 100644
--- a/webui/src/lib/types.ts
+++ b/webui/src/lib/types.ts
@@ -44,6 +44,10 @@ export interface UIMessage {
   images?: UIImage[];
   /** Signed or local UI-renderable media attachments. */
   media?: UIMediaAttachment[];
+  /** Assistant turn: model reasoning / thinking content collected from
+   * `kind: "reasoning"` frames. Each entry is one emit cycle, joined with
+   * blank lines on render. */
+  reasoning?: string[];
 }
 
 export interface ChatSummary {
@@ -141,7 +145,7 @@ export type InboundEvent =
       media_urls?: Array<{ url: string; name?: string }>;
       /** Present when the frame is an agent breadcrumb (e.g. tool hint,
        * generic progress line) rather than a conversational reply. */
-      kind?: "tool_hint" | "progress";
+      kind?: "tool_hint" | "progress" | "reasoning";
     }
   | {
       event: "delta";
diff --git a/webui/src/tests/message-bubble.test.tsx b/webui/src/tests/message-bubble.test.tsx
index 35cdaed40..77608b121 100644
--- a/webui/src/tests/message-bubble.test.tsx
+++ b/webui/src/tests/message-bubble.test.tsx
@@ -103,6 +103,39 @@ describe("MessageBubble", () => {
     expect(container.querySelector("video[controls]")).toBeInTheDocument();
   });
 
+  it("surfaces reasoning content above the assistant answer when provided", () => {
+    const message: UIMessage = {
+      id: "a-reasoning",
+      role: "assistant",
+      content: "The answer is 42.",
+      createdAt: Date.now(),
+      reasoning: ["Step 1: parse intent.", "Step 2: compute."],
+    };
+
+    render(<MessageBubble message={message} />);
+
+    expect(screen.getByText("Thinking")).toBeInTheDocument();
+    expect(screen.getByText(/Step 1: parse intent\./)).toBeInTheDocument();
+    expect(screen.getByText(/Step 2: compute\./)).toBeInTheDocument();
+    expect(screen.getByText("The answer is 42.")).toBeInTheDocument();
+  });
+
+  it("collapses the reasoning section when toggled", () => {
+    const message: UIMessage = {
+      id: "a-reasoning-collapse",
+      role: "assistant",
+      content: "done",
+      createdAt: Date.now(),
+      reasoning: ["hidden after toggle"],
+    };
+
+    render(<MessageBubble message={message} />);
+
+    expect(screen.getByText("hidden after toggle")).toBeInTheDocument();
+    fireEvent.click(screen.getByRole("button", { name: /thinking/i }));
+    expect(screen.queryByText("hidden after toggle")).not.toBeInTheDocument();
+  });
+
   it("renders assistant image media as a larger generated result", () => {
     const message: UIMessage = {
       id: "a-image",
diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx
index 60e6ada62..7fb94063c 100644
--- a/webui/src/tests/useNanobotStream.test.tsx
+++ b/webui/src/tests/useNanobotStream.test.tsx
@@ -113,6 +113,78 @@ describe("useNanobotStream", () => {
     expect(result.current.messages[1].kind).toBeUndefined();
   });
 
+  it("parks reasoning frames on a placeholder assistant message until the answer arrives", () => {
+    const fake = fakeClient();
+    const { result } = renderHook(() => useNanobotStream("chat-r", EMPTY_MESSAGES), {
+      wrapper: wrap(fake.client),
+    });
+
+    act(() => {
+      fake.emit("chat-r", {
+        event: "message",
+        chat_id: "chat-r",
+        text: "Let me think step by step.",
+        kind: "reasoning",
+      });
+      fake.emit("chat-r", {
+        event: "message",
+        chat_id: "chat-r",
+        text: "First, decompose the request.",
+        kind: "reasoning",
+      });
+    });
+
+    expect(result.current.messages).toHaveLength(1);
+    expect(result.current.messages[0].role).toBe("assistant");
+    expect(result.current.messages[0].reasoning).toEqual([
+      "Let me think step by step.",
+      "First, decompose the request.",
+    ]);
+  });
+
+  it("attaches reasoning to the latest assistant turn rather than spawning a new one", () => {
+    const fake = fakeClient();
+    const { result } = renderHook(() => useNanobotStream("chat-r2", EMPTY_MESSAGES), {
+      wrapper: wrap(fake.client),
+    });
+
+    act(() => {
+      fake.emit("chat-r2", {
+        event: "message",
+        chat_id: "chat-r2",
+        text: "The answer is 42.",
+      });
+      fake.emit("chat-r2", {
+        event: "message",
+        chat_id: "chat-r2",
+        text: "Reasoning surfaced post-hoc.",
+        kind: "reasoning",
+      });
+    });
+
+    expect(result.current.messages).toHaveLength(1);
+    expect(result.current.messages[0].content).toBe("The answer is 42.");
+    expect(result.current.messages[0].reasoning).toEqual(["Reasoning surfaced post-hoc."]);
+  });
+
+  it("ignores empty reasoning frames", () => {
+    const fake = fakeClient();
+    const { result } = renderHook(() => useNanobotStream("chat-r3", EMPTY_MESSAGES), {
+      wrapper: wrap(fake.client),
+    });
+
+    act(() => {
+      fake.emit("chat-r3", {
+        event: "message",
+        chat_id: "chat-r3",
+        text: "",
+        kind: "reasoning",
+      });
+    });
+
+    expect(result.current.messages).toHaveLength(0);
+  });
+
   it("attaches assistant media_urls to complete messages", () => {
     const fake = fakeClient();
     const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), {

From 458b4ba235b40e00139386a2c767670b91384903 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 07:13:43 +0000
Subject: [PATCH 06/17] feat(reasoning): stream reasoning content as a
 first-class channel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reasoning now flows as its own stream — symmetric to the answer's
``delta`` / ``stream_end`` pair — instead of being shipped as one
oversized progress message. This lets WebUI render a live "Thinking…"
bubble that updates in place, then auto-collapses when the stream
closes. Other channels remain plugin no-ops by default.

## Protocol

New metadata: ``_reasoning_delta`` (chunk) and ``_reasoning_end``
(close marker). ChannelManager routes both to the dedicated plugin
hooks below; the legacy one-shot ``_reasoning`` is kept for back-compat
and BaseChannel expands it into a single delta + end pair so plugins
only ever implement the streaming primitives.

WebSocket emits two new events:

- ``reasoning_delta`` (event, chat_id, text, optional stream_id)
- ``reasoning_end`` (event, chat_id, optional stream_id)

## BaseChannel surface

- ``send_reasoning_delta(chat_id, delta, metadata)`` — no-op default
- ``send_reasoning_end(chat_id, metadata)`` — no-op default
- ``send_reasoning(msg)`` — back-compat wrapper, base impl forwards
  to the streaming primitives

A channel adds reasoning support by overriding the two streaming
primitives. Telegram / Slack / Discord / Feishu / WeChat / Matrix keep
the base no-ops until their bubble UIs are adapted; reasoning silently
drops at dispatch, never as a stray text message.

## AgentHook

Adds ``emit_reasoning_end`` to the hook lifecycle. ``_LoopHook`` tracks
whether a reasoning segment is open and closes it on:

- the first answer delta arriving (so the UI locks the bubble before
  the answer renders below),
- ``on_stream_end``,
- one-shot ``reasoning_content`` / ``thinking_blocks`` after a single
  non-streaming response.

## WebUI

- ``UIMessage.reasoning`` is now a single accumulated string with a
  companion ``reasoningStreaming`` flag.
- ``useNanobotStream`` consumes ``reasoning_delta`` / ``reasoning_end``;
  legacy ``kind: "reasoning"`` is auto-translated to a delta + end.
- New ``ReasoningBubble``: shimmer header + auto-expanded while
  streaming, collapses to a clickable "Thinking" pill once closed,
  respects ``prefers-reduced-motion``.
- Answer deltas adopt the reasoning placeholder so the bubble and the
  answer share one assistant row.

## Tests

- ``tests/channels/test_channel_manager_reasoning.py`` — manager routes
  delta + end, drops on channel opt-out, expands one-shot back-compat.
- ``tests/channels/test_websocket_channel.py`` — new ``reasoning_delta``
  / ``reasoning_end`` frames, empty-chunk safety, no-subscriber safety,
  back-compat expansion.
- ``tests/agent/test_runner_reasoning.py`` — runner closes the segment
  on streaming answer start and after one-shot reasoning.
- WebUI ``useNanobotStream`` + ``message-bubble`` cover the new
  protocol and the shimmer styling.

## Docs

``docs/configuration.md`` and ``docs/websocket.md`` document the new
events and the plugin contract.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 docs/configuration.md                         |   2 +-
 docs/websocket.md                             |  23 +++
 nanobot/agent/hook.py                         |  11 ++
 nanobot/agent/loop.py                         |  36 +++-
 nanobot/agent/runner.py                       |  18 +-
 nanobot/channels/base.py                      |  45 ++++-
 nanobot/channels/manager.py                   |  28 ++-
 nanobot/channels/websocket.py                 |  60 ++++--
 tests/agent/test_runner_reasoning.py          |  42 ++++
 .../test_channel_manager_reasoning.py         | 139 +++++++++-----
 tests/channels/test_websocket_channel.py      |  63 ++++--
 webui/src/components/MessageBubble.tsx        |  57 ++++--
 webui/src/globals.css                         |  28 +++
 webui/src/hooks/useNanobotStream.ts           | 180 +++++++++++++-----
 webui/src/i18n/locales/en/common.json         |   1 +
 webui/src/i18n/locales/zh-CN/common.json      |   3 +-
 webui/src/lib/types.ts                        |  22 ++-
 webui/src/tests/message-bubble.test.tsx       |  42 ++--
 webui/src/tests/useNanobotStream.test.tsx     |  70 ++++---
 19 files changed, 649 insertions(+), 221 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index ed5a534cf..0123017d2 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -743,7 +743,7 @@ Global settings that apply to all channels. Configure under the `channels` secti
 |---------|---------|-------------|
 | `sendProgress` | `true` | Stream agent's text progress to the channel |
 | `sendToolHints` | `false` | Stream tool-call hints (e.g. `read_file("…")`) |
-| `showReasoning` | `true` | Allow channels to surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `<think>` tags). The setting is a plugin opt-in: even when `true`, a channel only renders reasoning if it overrides `send_reasoning()`. Currently surfaced on CLI and WebSocket/WebUI; other channels (Telegram, Slack, Discord, ...) keep it as a silent no-op until their bubble UI is adapted. Independent of `sendProgress`. |
+| `showReasoning` | `true` | Allow channels to surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `<think>` tags). Reasoning flows as a dedicated stream with `_reasoning_delta` / `_reasoning_end` markers — channels override `send_reasoning_delta` / `send_reasoning_end` to render in-place updates. Even with `true`, channels without those overrides stay no-op silently. Currently surfaced on CLI and WebSocket/WebUI (italic shimmer header, auto-collapses after the stream ends); Telegram / Slack / Discord / Feishu / WeChat / Matrix keep the base no-op until their bubble UI is adapted. Independent of `sendProgress`. |
 | `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) |
 | `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key is auto-resolved from the matching provider config. |
 | `transcriptionLanguage` | `null` | Optional ISO-639-1 language hint for audio transcription, e.g. `"en"`, `"ko"`, `"ja"`. |
diff --git a/docs/websocket.md b/docs/websocket.md
index 556bb5bb6..d6a816ac1 100644
--- a/docs/websocket.md
+++ b/docs/websocket.md
@@ -128,6 +128,29 @@ All frames are JSON text. Each message has an `event` field.
 }
 ```
 
+**`reasoning_delta`** — incremental model reasoning / thinking chunk for the active assistant turn. Mirrors `delta` but targets the reasoning bubble above the answer rather than the answer body:
+
+```json
+{
+  "event": "reasoning_delta",
+  "chat_id": "uuid-v4",
+  "text": "Let me decompose ",
+  "stream_id": "r1"
+}
+```
+
+**`reasoning_end`** — close marker for the active reasoning stream. WebUI uses this to lock the in-place bubble and switch from the shimmer header to a static collapsed state:
+
+```json
+{
+  "event": "reasoning_end",
+  "chat_id": "uuid-v4",
+  "stream_id": "r1"
+}
+```
+
+Reasoning frames only flow when the channel's `showReasoning` is `true` (default) and the model returns reasoning content (DeepSeek-R1 / Kimi / MiMo / OpenAI reasoning models, Anthropic extended thinking, or inline `<think>` / `<thought>` tags). Models without reasoning produce zero `reasoning_delta` frames.
+
 **`runtime_model_updated`** — broadcast when the gateway runtime model changes, for example after `/model <preset>`:
 
 ```json
diff --git a/nanobot/agent/hook.py b/nanobot/agent/hook.py
index 86775742d..5b6fed445 100644
--- a/nanobot/agent/hook.py
+++ b/nanobot/agent/hook.py
@@ -52,6 +52,14 @@ class AgentHook:
     async def emit_reasoning(self, reasoning_content: str | None) -> None:
         pass
 
+    async def emit_reasoning_end(self) -> None:
+        """Mark the end of an in-flight reasoning stream.
+
+        Hooks that buffer ``emit_reasoning`` chunks (for in-place UI updates)
+        flush and freeze the rendered group here. One-shot hooks ignore.
+        """
+        pass
+
     async def after_iteration(self, context: AgentHookContext) -> None:
         pass
 
@@ -102,6 +110,9 @@ class CompositeHook(AgentHook):
     async def emit_reasoning(self, reasoning_content: str | None) -> None:
         await self._for_each_hook_safe("emit_reasoning", reasoning_content)
 
+    async def emit_reasoning_end(self) -> None:
+        await self._for_each_hook_safe("emit_reasoning_end")
+
     async def after_iteration(self, context: AgentHookContext) -> None:
         await self._for_each_hook_safe("after_iteration", context)
 
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index e7b045f01..7897f89dd 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -87,6 +87,7 @@ class _LoopHook(AgentHook):
         self._session_key = session_key
         self._stream_buf = ""
         self._think_extractor = IncrementalThinkExtractor()
+        self._reasoning_open = False
 
     def wants_streaming(self) -> bool:
         return self._on_stream is not None
@@ -102,10 +103,15 @@ class _LoopHook(AgentHook):
         if await self._think_extractor.feed(self._stream_buf, self.emit_reasoning):
             context.streamed_reasoning = True
 
-        if incremental and self._on_stream:
-            await self._on_stream(incremental)
+        if incremental:
+            # Answer text has started — close any open reasoning segment so
+            # the UI can lock the bubble before the answer renders below it.
+            await self.emit_reasoning_end()
+            if self._on_stream:
+                await self._on_stream(incremental)
 
     async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
+        await self.emit_reasoning_end()
         if self._on_stream_end:
             await self._on_stream_end(resuming=resuming)
         self._stream_buf = ""
@@ -147,16 +153,27 @@ class _LoopHook(AgentHook):
         )
 
     async def emit_reasoning(self, reasoning_content: str | None) -> None:
-        """Publish reasoning content; channel plugins decide whether to render.
+        """Publish a reasoning chunk; channel plugins decide whether to render.
 
-        The loop is intentionally not the gate: ``ChannelsConfig.show_reasoning``
-        is a default that ``ChannelManager`` and ``BaseChannel.send_reasoning``
-        consult per channel. A channel without a low-emphasis UI primitive
-        keeps the base no-op and the content drops at the dispatch boundary.
+        Each call is one delta in a streaming session. ``emit_reasoning_end``
+        closes the segment. The loop is intentionally not the gate:
+        ``ChannelsConfig.show_reasoning`` is a default that ``ChannelManager``
+        and ``BaseChannel.send_reasoning_delta`` consult per channel — a
+        channel without a low-emphasis UI primitive keeps the base no-op
+        and the content drops at the dispatch boundary.
         """
         if self._on_progress and reasoning_content:
+            self._reasoning_open = True
             await self._on_progress(reasoning_content, reasoning=True)
 
+    async def emit_reasoning_end(self) -> None:
+        """Close the current reasoning stream segment, if any was open."""
+        if self._reasoning_open and self._on_progress:
+            self._reasoning_open = False
+            await self._on_progress("", reasoning_end=True)
+        else:
+            self._reasoning_open = False
+
     async def after_iteration(self, context: AgentHookContext) -> None:
         if (
             self._on_progress
@@ -665,12 +682,15 @@ class AgentLoop:
             tool_hint: bool = False,
             tool_events: list[dict[str, Any]] | None = None,
             reasoning: bool = False,
+            reasoning_end: bool = False,
         ) -> None:
             meta = dict(msg.metadata or {})
             meta["_progress"] = True
             meta["_tool_hint"] = tool_hint
             if reasoning:
-                meta["_reasoning"] = True
+                meta["_reasoning_delta"] = True
+            if reasoning_end:
+                meta["_reasoning_end"] = True
             if tool_events:
                 meta["_tool_events"] = tool_events
             await self.bus.publish_outbound(
diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py
index 6b8e5383c..37da63872 100644
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@@ -291,6 +291,7 @@ class AgentRunner:
             response.content = cleaned_content
             if reasoning_text and not context.streamed_reasoning:
                 await hook.emit_reasoning(reasoning_text)
+                await hook.emit_reasoning_end()
                 context.streamed_reasoning = True
 
             if response.should_execute_tools:
@@ -617,6 +618,8 @@ class AgentRunner:
             and getattr(self.provider, "supports_progress_deltas", False) is True
         )
 
+        progress_state: dict[str, bool] | None = None
+
         if wants_streaming:
             async def _stream(delta: str) -> None:
                 if delta:
@@ -630,6 +633,7 @@ class AgentRunner:
         elif wants_progress_streaming:
             stream_buf = ""
             think_extractor = IncrementalThinkExtractor()
+            progress_state = {"reasoning_open": False}
 
             async def _stream_progress(delta: str) -> None:
                 nonlocal stream_buf
@@ -642,8 +646,12 @@ class AgentRunner:
 
                 if await think_extractor.feed(stream_buf, hook.emit_reasoning):
                     context.streamed_reasoning = True
+                    progress_state["reasoning_open"] = True
 
                 if incremental:
+                    if progress_state["reasoning_open"]:
+                        await hook.emit_reasoning_end()
+                        progress_state["reasoning_open"] = False
                     context.streamed_content = True
                     await spec.progress_callback(incremental)
 
@@ -654,16 +662,20 @@ class AgentRunner:
         else:
             coro = self.provider.chat_with_retry(**kwargs)
 
-        if timeout_s is None:
-            return await coro
         try:
-            return await asyncio.wait_for(coro, timeout=timeout_s)
+            response = (
+                await coro if timeout_s is None
+                else await asyncio.wait_for(coro, timeout=timeout_s)
+            )
         except asyncio.TimeoutError:
             return LLMResponse(
                 content=f"Error calling LLM: timed out after {timeout_s:g}s",
                 finish_reason="error",
                 error_kind="timeout",
             )
+        if progress_state and progress_state.get("reasoning_open"):
+            await hook.emit_reasoning_end()
+        return response
 
     async def _request_finalization_retry(
         self,
diff --git a/nanobot/channels/base.py b/nanobot/channels/base.py
index c82003d88..257127d5a 100644
--- a/nanobot/channels/base.py
+++ b/nanobot/channels/base.py
@@ -121,18 +121,53 @@ class BaseChannel(ABC):
         """
         pass
 
-    async def send_reasoning(self, msg: OutboundMessage) -> None:
-        """Surface model reasoning/thinking content.
+    async def send_reasoning_delta(
+        self, chat_id: str, delta: str, metadata: dict[str, Any] | None = None
+    ) -> None:
+        """Stream a chunk of model reasoning/thinking content.
 
         Default is no-op. Channels with a native low-emphasis primitive
         (Slack context block, Telegram expandable blockquote, Discord
         subtext, WebUI italic bubble, ...) override to render reasoning
-        as a subordinate trace. Channels without a suitable affordance
-        keep this no-op: silently dropping is better than leaking raw
-        model thoughts as regular conversational messages.
+        as a subordinate trace that updates in place as the model thinks.
+
+        Streaming contract mirrors :meth:`send_delta`: ``_reasoning_delta``
+        is a chunk, ``_reasoning_end`` ends the current reasoning segment,
+        and stateful implementations should key buffers by ``_stream_id``
+        rather than only by ``chat_id``.
         """
         return
 
+    async def send_reasoning_end(
+        self, chat_id: str, metadata: dict[str, Any] | None = None
+    ) -> None:
+        """Mark the end of a reasoning stream segment.
+
+        Default is no-op. Channels that buffer ``send_reasoning_delta``
+        chunks for in-place updates use this signal to flush and freeze
+        the rendered group; one-shot channels can ignore it entirely.
+        """
+        return
+
+    async def send_reasoning(self, msg: OutboundMessage) -> None:
+        """Deliver a complete reasoning block.
+
+        Default implementation reuses the streaming pair so plugins only
+        need to override the delta/end methods. Equivalent to one delta
+        with the full content followed immediately by an end marker —
+        keeps a single rendering path for both streamed and one-shot
+        reasoning (e.g. DeepSeek-R1's final-response ``reasoning_content``).
+        """
+        if not msg.content:
+            return
+        meta = dict(msg.metadata or {})
+        meta.setdefault("_reasoning_delta", True)
+        await self.send_reasoning_delta(msg.chat_id, msg.content, meta)
+        end_meta = dict(meta)
+        end_meta.pop("_reasoning_delta", None)
+        end_meta["_reasoning_end"] = True
+        await self.send_reasoning_end(msg.chat_id, end_meta)
+
     @property
     def supports_streaming(self) -> bool:
         """True when config enables streaming AND this subclass implements send_delta."""
diff --git a/nanobot/channels/manager.py b/nanobot/channels/manager.py
index abf9bf043..3a6b6e50f 100644
--- a/nanobot/channels/manager.py
+++ b/nanobot/channels/manager.py
@@ -283,13 +283,18 @@ class ChannelManager:
                         timeout=1.0
                     )
 
-                if msg.metadata.get("_reasoning"):
-                    # Reasoning rides its own plugin channel: only delivered when
-                    # the destination channel both opts in (``show_reasoning``)
-                    # and overrides ``send_reasoning``. Channels without a
-                    # low-emphasis UI primitive keep the base no-op and the
-                    # content silently drops here rather than leak as a
-                    # conversational reply.
+                if (
+                    msg.metadata.get("_reasoning_delta")
+                    or msg.metadata.get("_reasoning_end")
+                    or msg.metadata.get("_reasoning")
+                ):
+                    # Reasoning rides its own plugin channel: only delivered
+                    # when the destination channel opts in via ``show_reasoning``
+                    # and overrides the streaming primitives. Channels without
+                    # a low-emphasis UI affordance keep the base no-op and the
+                    # content silently drops here. ``_reasoning`` (one-shot)
+                    # is accepted for backward compatibility with hooks that
+                    # haven't migrated to delta/end yet.
                     channel = self.channels.get(msg.channel)
                     if channel is not None and channel.show_reasoning:
                         await self._send_with_retry(channel, msg)
@@ -345,7 +350,14 @@ class ChannelManager:
     @staticmethod
     async def _send_once(channel: BaseChannel, msg: OutboundMessage) -> None:
         """Send one outbound message without retry policy."""
-        if msg.metadata.get("_reasoning"):
+        if msg.metadata.get("_reasoning_end"):
+            await channel.send_reasoning_end(msg.chat_id, msg.metadata)
+        elif msg.metadata.get("_reasoning_delta"):
+            await channel.send_reasoning_delta(msg.chat_id, msg.content, msg.metadata)
+        elif msg.metadata.get("_reasoning"):
+            # Back-compat: one-shot reasoning. BaseChannel translates this
+            # to a single delta + end pair so plugins only implement the
+            # streaming primitives.
             await channel.send_reasoning(msg)
         elif msg.metadata.get("_stream_delta") or msg.metadata.get("_stream_end"):
             await channel.send_delta(msg.chat_id, msg.content, msg.metadata)
diff --git a/nanobot/channels/websocket.py b/nanobot/channels/websocket.py
index bba68397f..a77c8594f 100644
--- a/nanobot/channels/websocket.py
+++ b/nanobot/channels/websocket.py
@@ -1487,30 +1487,54 @@ class WebSocketChannel(BaseChannel):
         for connection in conns:
             await self._safe_send_to(connection, raw, label=" ")
 
-    async def send_reasoning(self, msg: OutboundMessage) -> None:
-        """Stream model reasoning as a subordinate trace frame.
-
-        Renders as ``kind=reasoning`` alongside the existing ``tool_hint`` /
-        ``progress`` frames; the WebUI mounts these on the active assistant
-        bubble rather than as a conversational reply.
+    async def send_reasoning_delta(
+        self,
+        chat_id: str,
+        delta: str,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Push one chunk of model reasoning. Mirrors ``send_delta`` shape so
+        WebUI receives a stream that opens, updates in place, and closes —
+        rendered above the active assistant bubble with a shimmer header
+        until the matching ``reasoning_end`` arrives.
         """
-        conns = list(self._subs.get(msg.chat_id, ()))
-        if not conns:
+        conns = list(self._subs.get(chat_id, ()))
+        if not conns or not delta:
             return
-        if not msg.content:
-            return
-        payload: dict[str, Any] = {
-            "event": "message",
-            "chat_id": msg.chat_id,
-            "text": msg.content,
-            "kind": "reasoning",
+        meta = metadata or {}
+        body: dict[str, Any] = {
+            "event": "reasoning_delta",
+            "chat_id": chat_id,
+            "text": delta,
         }
-        if msg.reply_to:
-            payload["reply_to"] = msg.reply_to
-        raw = json.dumps(payload, ensure_ascii=False)
+        stream_id = meta.get("_stream_id")
+        if stream_id is not None:
+            body["stream_id"] = stream_id
+        raw = json.dumps(body, ensure_ascii=False)
         for connection in conns:
             await self._safe_send_to(connection, raw, label=" reasoning ")
 
+    async def send_reasoning_end(
+        self,
+        chat_id: str,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        """Close the current reasoning stream segment for in-place renderers."""
+        conns = list(self._subs.get(chat_id, ()))
+        if not conns:
+            return
+        meta = metadata or {}
+        body: dict[str, Any] = {
+            "event": "reasoning_end",
+            "chat_id": chat_id,
+        }
+        stream_id = meta.get("_stream_id")
+        if stream_id is not None:
+            body["stream_id"] = stream_id
+        raw = json.dumps(body, ensure_ascii=False)
+        for connection in conns:
+            await self._safe_send_to(connection, raw, label=" reasoning_end ")
+
     async def send_delta(
         self,
         chat_id: str,
diff --git a/tests/agent/test_runner_reasoning.py b/tests/agent/test_runner_reasoning.py
index 512f3d2e9..d971e05a1 100644
--- a/tests/agent/test_runner_reasoning.py
+++ b/tests/agent/test_runner_reasoning.py
@@ -24,11 +24,15 @@ class _RecordingHook(AgentHook):
     def __init__(self) -> None:
         super().__init__()
         self.emitted: list[str] = []
+        self.end_calls = 0
 
     async def emit_reasoning(self, reasoning_content: str | None) -> None:
         if reasoning_content:
             self.emitted.append(reasoning_content)
 
+    async def emit_reasoning_end(self) -> None:
+        self.end_calls += 1
+
 
 @pytest.mark.asyncio
 async def test_runner_preserves_reasoning_fields_in_assistant_history():
@@ -277,3 +281,41 @@ async def test_runner_does_not_double_emit_when_inline_think_already_streamed():
 
     assert result.final_content == "The answer."
     assert hook.emitted == ["working..."]
+    assert hook.end_calls >= 1, "reasoning stream must be closed once the answer starts"
+
+
+@pytest.mark.asyncio
+async def test_runner_closes_reasoning_stream_after_one_shot_response():
+    """A non-streaming response carrying ``reasoning_content`` must emit
+    both a reasoning delta and an end marker so channels can finalize the
+    in-place bubble."""
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(
+            content="answer",
+            reasoning_content="hidden thought",
+            tool_calls=[],
+            usage={"prompt_tokens": 5, "completion_tokens": 3},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    hook = _RecordingHook()
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "q"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=hook,
+    ))
+
+    assert result.final_content == "answer"
+    assert hook.emitted == ["hidden thought"]
+    assert hook.end_calls == 1
diff --git a/tests/channels/test_channel_manager_reasoning.py b/tests/channels/test_channel_manager_reasoning.py
index 2200f4be2..bc2a640c6 100644
--- a/tests/channels/test_channel_manager_reasoning.py
+++ b/tests/channels/test_channel_manager_reasoning.py
@@ -1,14 +1,22 @@
 """Tests for ChannelManager routing of model reasoning content.
 
-Reasoning is delivered as a separate plugin action (``send_reasoning``)
-rather than a metadata flag on a regular outbound. The manager routes
-``_reasoning`` messages only to channels that opt in via
-``channel.show_reasoning``; channels without a low-emphasis UI primitive
-keep the base no-op and the content silently drops at dispatch.
+Reasoning is delivered through plugin streaming primitives
+(``send_reasoning_delta`` / ``send_reasoning_end``) so each channel
+controls in-place rendering — mirroring the existing answer ``send_delta``
+/ ``stream_end`` pair. The manager forwards reasoning frames only to
+channels that opt in via ``channel.show_reasoning``; plugins without a
+low-emphasis UI primitive keep the base no-op and the content silently
+drops at dispatch.
+
+One-shot ``_reasoning`` frames are accepted for back-compat with hooks
+that haven't migrated yet — ``BaseChannel.send_reasoning`` expands them
+to a single delta + end pair so plugins only implement the streaming
+primitives.
 """
 
 from __future__ import annotations
 
+import asyncio
 from unittest.mock import AsyncMock
 
 import pytest
@@ -27,7 +35,8 @@ class _MockChannel(BaseChannel):
     def __init__(self, config, bus):
         super().__init__(config, bus)
         self._send_mock = AsyncMock()
-        self._send_reasoning_mock = AsyncMock()
+        self._delta_mock = AsyncMock()
+        self._end_mock = AsyncMock()
 
     async def start(self):  # pragma: no cover - not exercised
         pass
@@ -38,8 +47,11 @@ class _MockChannel(BaseChannel):
     async def send(self, msg):
         return await self._send_mock(msg)
 
-    async def send_reasoning(self, msg):
-        return await self._send_reasoning_mock(msg)
+    async def send_reasoning_delta(self, chat_id, delta, metadata=None):
+        return await self._delta_mock(chat_id, delta, metadata)
+
+    async def send_reasoning_end(self, chat_id, metadata=None):
+        return await self._end_mock(chat_id, metadata)
 
 
 @pytest.fixture
@@ -50,17 +62,52 @@ def manager() -> ChannelManager:
 
 
 @pytest.mark.asyncio
-async def test_reasoning_routes_to_send_reasoning_not_send(manager):
+async def test_reasoning_delta_routes_to_send_reasoning_delta(manager):
     channel = manager.channels["mock"]
     msg = OutboundMessage(
         channel="mock",
         chat_id="c1",
-        content="step-by-step thinking",
+        content="step-by-step",
+        metadata={"_progress": True, "_reasoning_delta": True, "_stream_id": "r1"},
+    )
+    await manager._send_once(channel, msg)
+    channel._delta_mock.assert_awaited_once()
+    args = channel._delta_mock.await_args.args
+    assert args[0] == "c1"
+    assert args[1] == "step-by-step"
+    channel._send_mock.assert_not_awaited()
+    channel._end_mock.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_reasoning_end_routes_to_send_reasoning_end(manager):
+    channel = manager.channels["mock"]
+    msg = OutboundMessage(
+        channel="mock",
+        chat_id="c1",
+        content="",
+        metadata={"_progress": True, "_reasoning_end": True, "_stream_id": "r1"},
+    )
+    await manager._send_once(channel, msg)
+    channel._end_mock.assert_awaited_once()
+    channel._delta_mock.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_legacy_one_shot_reasoning_expands_to_delta_plus_end(manager):
+    """`_reasoning` (no delta/end pair) falls back through `send_reasoning`
+    which the base class expands to a single delta + end. Hooks that haven't
+    migrated still surface in WebUI as a complete stream segment."""
+    channel = manager.channels["mock"]
+    msg = OutboundMessage(
+        channel="mock",
+        chat_id="c1",
+        content="one-shot reasoning",
         metadata={"_progress": True, "_reasoning": True},
     )
     await manager._send_once(channel, msg)
-    channel._send_reasoning_mock.assert_awaited_once_with(msg)
-    channel._send_mock.assert_not_awaited()
+    channel._delta_mock.assert_awaited_once()
+    channel._end_mock.assert_awaited_once()
 
 
 @pytest.mark.asyncio
@@ -71,14 +118,14 @@ async def test_dispatch_drops_reasoning_when_channel_opts_out(manager):
         channel="mock",
         chat_id="c1",
         content="hidden thinking",
-        metadata={"_progress": True, "_reasoning": True},
+        metadata={"_progress": True, "_reasoning_delta": True},
     )
     await manager.bus.publish_outbound(msg)
 
-    pumped = await _pump_one(manager)
+    await _pump_one(manager)
 
-    assert pumped is True
-    channel._send_reasoning_mock.assert_not_awaited()
+    channel._delta_mock.assert_not_awaited()
+    channel._end_mock.assert_not_awaited()
     channel._send_mock.assert_not_awaited()
 
 
@@ -86,20 +133,24 @@ async def test_dispatch_drops_reasoning_when_channel_opts_out(manager):
 async def test_dispatch_delivers_reasoning_when_channel_opts_in(manager):
     channel = manager.channels["mock"]
     channel.show_reasoning = True
-    msg = OutboundMessage(
+    for chunk in ("first ", "second"):
+        await manager.bus.publish_outbound(OutboundMessage(
+            channel="mock",
+            chat_id="c1",
+            content=chunk,
+            metadata={"_progress": True, "_reasoning_delta": True, "_stream_id": "r1"},
+        ))
+    await manager.bus.publish_outbound(OutboundMessage(
         channel="mock",
         chat_id="c1",
-        content="visible thinking",
-        metadata={"_progress": True, "_reasoning": True},
-    )
-    await manager.bus.publish_outbound(msg)
+        content="",
+        metadata={"_progress": True, "_reasoning_end": True, "_stream_id": "r1"},
+    ))
 
-    pumped = await _pump_one(manager)
+    await _pump_one(manager)
 
-    assert pumped is True
-    channel._send_reasoning_mock.assert_awaited_once()
-    delivered = channel._send_reasoning_mock.await_args.args[0]
-    assert delivered.content == "visible thinking"
+    assert channel._delta_mock.await_count == 2
+    channel._end_mock.assert_awaited_once()
 
 
 @pytest.mark.asyncio
@@ -108,21 +159,19 @@ async def test_dispatch_silently_drops_reasoning_for_unknown_channel(manager):
         channel="ghost",
         chat_id="c1",
         content="nobody home",
-        metadata={"_progress": True, "_reasoning": True},
+        metadata={"_progress": True, "_reasoning_delta": True},
     )
     await manager.bus.publish_outbound(msg)
 
-    pumped = await _pump_one(manager)
+    await _pump_one(manager)
 
-    assert pumped is True
-    # Mock channel must not receive anything destined for a different channel.
-    manager.channels["mock"]._send_reasoning_mock.assert_not_awaited()
+    manager.channels["mock"]._delta_mock.assert_not_awaited()
     manager.channels["mock"]._send_mock.assert_not_awaited()
 
 
 @pytest.mark.asyncio
-async def test_base_channel_send_reasoning_is_noop_safe():
-    """Plugins that don't override `send_reasoning` must not blow up."""
+async def test_base_channel_reasoning_primitives_are_noop_safe():
+    """Plugins that don't override the streaming primitives must not blow up."""
 
     class _Plain(BaseChannel):
         name = "plain"
@@ -138,7 +187,9 @@ async def test_base_channel_send_reasoning_is_noop_safe():
             pass
 
     channel = _Plain({}, MessageBus())
-    # No exception, returns None.
+    assert await channel.send_reasoning_delta("c", "x") is None
+    assert await channel.send_reasoning_end("c") is None
+    # And the one-shot wrapper translates without raising.
     assert await channel.send_reasoning(
         OutboundMessage(channel="plain", chat_id="c", content="x", metadata={})
     ) is None
@@ -151,26 +202,21 @@ async def test_reasoning_routing_does_not_consult_send_progress(manager):
     channel = manager.channels["mock"]
     channel.send_progress = False
     channel.show_reasoning = True
-    msg = OutboundMessage(
+    await manager.bus.publish_outbound(OutboundMessage(
         channel="mock",
         chat_id="c1",
         content="still surfaces",
-        metadata={"_progress": True, "_reasoning": True},
-    )
-    await manager.bus.publish_outbound(msg)
+        metadata={"_progress": True, "_reasoning_delta": True},
+    ))
 
-    pumped = await _pump_one(manager)
+    await _pump_one(manager)
 
-    assert pumped is True
-    channel._send_reasoning_mock.assert_awaited_once()
+    channel._delta_mock.assert_awaited_once()
 
 
-async def _pump_one(manager: ChannelManager) -> bool:
-    """Drive the dispatcher for exactly one message, then cancel."""
-    import asyncio
-
+async def _pump_one(manager: ChannelManager) -> None:
+    """Drive the dispatcher until the outbound queue drains, then cancel."""
     task = asyncio.create_task(manager._dispatch_outbound())
-    # Yield control until the queue drains.
     for _ in range(50):
         await asyncio.sleep(0.01)
         if manager.bus.outbound.qsize() == 0:
@@ -180,4 +226,3 @@ async def _pump_one(manager: ChannelManager) -> bool:
         await task
     except asyncio.CancelledError:
         pass
-    return True
diff --git a/tests/channels/test_websocket_channel.py b/tests/channels/test_websocket_channel.py
index 0e682ed0a..f11cb21b4 100644
--- a/tests/channels/test_websocket_channel.py
+++ b/tests/channels/test_websocket_channel.py
@@ -359,30 +359,44 @@ async def test_send_delta_emits_delta_and_stream_end() -> None:
 
 
 @pytest.mark.asyncio
-async def test_send_reasoning_emits_reasoning_kind_frame() -> None:
+async def test_send_reasoning_delta_emits_streaming_frame() -> None:
     bus = MagicMock()
     channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
     mock_ws = AsyncMock()
     channel._attach(mock_ws, "chat-1")
 
-    await channel.send_reasoning(OutboundMessage(
-        channel="websocket",
-        chat_id="chat-1",
-        content="step-by-step thinking",
-        metadata={"_progress": True, "_reasoning": True},
-    ))
+    await channel.send_reasoning_delta(
+        "chat-1",
+        "step-by-step thinking",
+        {"_reasoning_delta": True, "_stream_id": "r1"},
+    )
 
     mock_ws.send.assert_awaited_once()
     payload = json.loads(mock_ws.send.await_args.args[0])
-    assert payload["event"] == "message"
+    assert payload["event"] == "reasoning_delta"
     assert payload["chat_id"] == "chat-1"
     assert payload["text"] == "step-by-step thinking"
-    assert payload["kind"] == "reasoning"
+    assert payload["stream_id"] == "r1"
 
 
 @pytest.mark.asyncio
-async def test_send_reasoning_drops_empty_content() -> None:
-    """Empty reasoning emits nothing — keeps the frontend bubble clean."""
+async def test_send_reasoning_end_emits_close_frame() -> None:
+    bus = MagicMock()
+    channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
+    mock_ws = AsyncMock()
+    channel._attach(mock_ws, "chat-1")
+
+    await channel.send_reasoning_end("chat-1", {"_reasoning_end": True, "_stream_id": "r1"})
+
+    payload = json.loads(mock_ws.send.await_args.args[0])
+    assert payload == {"event": "reasoning_end", "chat_id": "chat-1", "stream_id": "r1"}
+
+
+@pytest.mark.asyncio
+async def test_send_reasoning_one_shot_expands_to_delta_plus_end() -> None:
+    """``send_reasoning`` is back-compat for hooks that haven't migrated:
+    the base implementation must produce one delta and one end so the
+    WebUI sees the same shape either way."""
     bus = MagicMock()
     channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
     mock_ws = AsyncMock()
@@ -391,10 +405,27 @@ async def test_send_reasoning_drops_empty_content() -> None:
     await channel.send_reasoning(OutboundMessage(
         channel="websocket",
         chat_id="chat-1",
-        content="",
+        content="thinking",
         metadata={"_reasoning": True},
     ))
 
+    assert mock_ws.send.await_count == 2
+    first = json.loads(mock_ws.send.call_args_list[0][0][0])
+    second = json.loads(mock_ws.send.call_args_list[1][0][0])
+    assert first["event"] == "reasoning_delta"
+    assert first["text"] == "thinking"
+    assert second["event"] == "reasoning_end"
+
+
+@pytest.mark.asyncio
+async def test_send_reasoning_delta_drops_empty_chunks() -> None:
+    bus = MagicMock()
+    channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
+    mock_ws = AsyncMock()
+    channel._attach(mock_ws, "chat-1")
+
+    await channel.send_reasoning_delta("chat-1", "", {"_reasoning_delta": True})
+
     mock_ws.send.assert_not_awaited()
 
 
@@ -403,12 +434,8 @@ async def test_send_reasoning_without_subscribers_is_noop() -> None:
     bus = MagicMock()
     channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
 
-    await channel.send_reasoning(OutboundMessage(
-        channel="websocket",
-        chat_id="unattached",
-        content="thinking",
-        metadata={"_reasoning": True},
-    ))
+    await channel.send_reasoning_delta("unattached", "thinking", None)
+    await channel.send_reasoning_end("unattached", None)
     # No subscribers, no exception, no send.
 
 
diff --git a/webui/src/components/MessageBubble.tsx b/webui/src/components/MessageBubble.tsx
index 556460824..9002ad500 100644
--- a/webui/src/components/MessageBubble.tsx
+++ b/webui/src/components/MessageBubble.tsx
@@ -1,4 +1,4 @@
-import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import { useCallback, useEffect, useRef, useState } from "react";
 import { Check, ChevronRight, Copy, FileIcon, ImageIcon, PlaySquare, Sparkles, Wrench } from "lucide-react";
 import { useTranslation } from "react-i18next";
 
@@ -85,12 +85,16 @@ export function MessageBubble({ message }: MessageBubbleProps) {
 
   const empty = message.content.trim().length === 0;
   const media = message.media ?? [];
-  const reasoning = message.role === "assistant" ? message.reasoning ?? [] : [];
+  const reasoning = message.role === "assistant" ? message.reasoning ?? "" : "";
+  const reasoningStreaming = !!(message.role === "assistant" && message.reasoningStreaming);
+  const hasReasoning = reasoning.length > 0 || reasoningStreaming;
   const showAssistantActions = message.role === "assistant" && !message.isStreaming && !empty;
   return (
     <div className={cn("w-full text-[15px]", baseAnim)} style={{ lineHeight: "var(--cjk-line-height)" }}>
-      {reasoning.length > 0 ? <ReasoningBubble lines={reasoning} /> : null}
-      {empty && message.isStreaming && reasoning.length === 0 ? (
+      {hasReasoning ? (
+        <ReasoningBubble text={reasoning} streaming={reasoningStreaming} />
+      ) : null}
+      {empty && message.isStreaming && !hasReasoning ? (
         <TypingDots />
       ) : empty && message.isStreaming ? null : (
         <>
@@ -437,33 +441,52 @@ function TraceGroup({ message, animClass }: TraceGroupProps) {
 }
 
 interface ReasoningBubbleProps {
-  lines: string[];
+  text: string;
+  streaming: boolean;
 }
 
 /**
- * Subordinate "thinking" trace shown above an assistant turn. Mirrors the
- * CLI's italic dim ``ChevronRight`` row visually; collapsible because
- * reasoning from models like DeepSeek-R1 / o-series can run long. Defaults
- * to expanded while the answer is still streaming (so the user sees the
- * model "thinking out loud"), but the toggle persists across rerenders.
+ * Subordinate "thinking" trace shown above an assistant turn.
+ *
+ * Lifecycle:
+ *   - While ``streaming`` is true (``reasoning_delta`` frames still arriving),
+ *     the bubble defaults to open and the header runs a shimmer + pulse so
+ *     the user sees the model "thinking out loud" in real time.
+ *   - On ``reasoning_end`` the bubble auto-collapses for prose density —
+ *     the user can re-expand to inspect the chain of thought. The local
+ *     toggle persists once the user interacts.
  */
-function ReasoningBubble({ lines }: ReasoningBubbleProps) {
+function ReasoningBubble({ text, streaming }: ReasoningBubbleProps) {
   const { t } = useTranslation();
-  const [open, setOpen] = useState(true);
-  const text = useMemo(() => lines.join("\n\n"), [lines]);
+  const [userToggled, setUserToggled] = useState(false);
+  const [openLocal, setOpenLocal] = useState(true);
+  const open = userToggled ? openLocal : streaming;
+  const onToggle = () => {
+    setUserToggled(true);
+    setOpenLocal((v) => (userToggled ? !v : !open));
+  };
   return (
     <div className="mb-2 w-full animate-in fade-in-0 slide-in-from-top-1 duration-200">
       <button
         type="button"
-        onClick={() => setOpen((v) => !v)}
+        onClick={onToggle}
         className={cn(
           "flex w-full items-center gap-2 rounded-md px-2 py-1.5",
           "text-xs text-muted-foreground transition-colors hover:bg-muted/45",
+          streaming && "reasoning-shimmer",
         )}
         aria-expanded={open}
+        aria-live={streaming ? "polite" : undefined}
       >
-        <Sparkles className="h-3.5 w-3.5" aria-hidden />
-        <span className="font-medium">{t("message.reasoning", { defaultValue: "Thinking" })}</span>
+        <Sparkles
+          className={cn("h-3.5 w-3.5", streaming && "animate-pulse")}
+          aria-hidden
+        />
+        <span className="font-medium">
+          {streaming
+            ? t("message.reasoningStreaming", { defaultValue: "Thinking…" })
+            : t("message.reasoning", { defaultValue: "Thinking" })}
+        </span>
         <ChevronRight
           aria-hidden
           className={cn(
@@ -472,7 +495,7 @@ function ReasoningBubble({ lines }: ReasoningBubbleProps) {
           )}
         />
       </button>
-      {open && (
+      {open && text.length > 0 && (
         <div
           className={cn(
             "mt-1 whitespace-pre-wrap break-words border-l border-muted-foreground/20 pl-3",
diff --git a/webui/src/globals.css b/webui/src/globals.css
index 802009ee7..a365e33b6 100644
--- a/webui/src/globals.css
+++ b/webui/src/globals.css
@@ -117,6 +117,34 @@
     --cjk-line-height: 1.625;
   }
 
+  /* Shimmer band sweeping across the reasoning header while
+     ``reasoning_delta`` frames are arriving. Pure CSS, no JS animation,
+     respects ``prefers-reduced-motion``. */
+  @keyframes reasoning-shimmer-sweep {
+    0% {
+      background-position: -200% 0;
+    }
+    100% {
+      background-position: 200% 0;
+    }
+  }
+  .reasoning-shimmer {
+    background-image: linear-gradient(
+      90deg,
+      transparent 0%,
+      hsl(var(--muted-foreground) / 0.18) 50%,
+      transparent 100%
+    );
+    background-size: 200% 100%;
+    background-repeat: no-repeat;
+    animation: reasoning-shimmer-sweep 2.2s linear infinite;
+  }
+  @media (prefers-reduced-motion: reduce) {
+    .reasoning-shimmer {
+      animation: none;
+    }
+  }
+
   /* Subtle scrollbar that doesn't fight the dark background. */
   .scrollbar-thin {
     scrollbar-width: thin;
diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts
index ee460cf56..60736b393 100644
--- a/webui/src/hooks/useNanobotStream.ts
+++ b/webui/src/hooks/useNanobotStream.ts
@@ -18,6 +18,82 @@ interface StreamBuffer {
   parts: string[];
 }
 
+/**
+ * Append a reasoning chunk to the last open reasoning stream in ``prev``.
+ *
+ * Lookup rule: find the most recent assistant turn that is either still
+ * streaming reasoning (``reasoningStreaming``) or has no answer text yet.
+ * Anything else starts a fresh streaming placeholder so a new turn's
+ * reasoning never bleeds into the previous answer.
+ */
+function attachReasoningChunk(prev: UIMessage[], chunk: string): UIMessage[] {
+  for (let i = prev.length - 1; i >= 0; i -= 1) {
+    const candidate = prev[i];
+    if (candidate.role !== "assistant" || candidate.kind === "trace") continue;
+    const hasAnswer = candidate.content.length > 0;
+    if (candidate.reasoningStreaming || (!hasAnswer && candidate.reasoning !== undefined)) {
+      const merged: UIMessage = {
+        ...candidate,
+        reasoning: (candidate.reasoning ?? "") + chunk,
+        reasoningStreaming: true,
+      };
+      return [...prev.slice(0, i), merged, ...prev.slice(i + 1)];
+    }
+    if (!hasAnswer && candidate.isStreaming) {
+      const merged: UIMessage = {
+        ...candidate,
+        reasoning: chunk,
+        reasoningStreaming: true,
+      };
+      return [...prev.slice(0, i), merged, ...prev.slice(i + 1)];
+    }
+    break;
+  }
+  return [
+    ...prev,
+    {
+      id: crypto.randomUUID(),
+      role: "assistant",
+      content: "",
+      isStreaming: true,
+      reasoning: chunk,
+      reasoningStreaming: true,
+      createdAt: Date.now(),
+    },
+  ];
+}
+
+/**
+ * Find the most recent assistant placeholder that an incoming answer
+ * delta should adopt instead of spawning a parallel row. We look for an
+ * empty-content assistant turn that is still marked ``isStreaming`` —
+ * typically created earlier by ``reasoning_delta``. Anything else means
+ * the model already produced an answer in a previous turn, so the new
+ * delta belongs in a fresh row.
+ */
+function findActiveAssistantPlaceholder(prev: UIMessage[]): string | null {
+  const last = prev[prev.length - 1];
+  if (!last) return null;
+  if (last.role !== "assistant" || last.kind === "trace") return null;
+  if (last.content.length > 0) return null;
+  if (!last.isStreaming) return null;
+  return last.id;
+}
+
+/**
+ * Close the active reasoning stream segment, if any. Idempotent: a
+ * ``reasoning_end`` with no preceding deltas is a harmless no-op.
+ */
+function closeReasoningStream(prev: UIMessage[]): UIMessage[] {
+  for (let i = prev.length - 1; i >= 0; i -= 1) {
+    const candidate = prev[i];
+    if (!candidate.reasoningStreaming) continue;
+    const merged: UIMessage = { ...candidate, reasoningStreaming: false };
+    return [...prev.slice(0, i), merged, ...prev.slice(i + 1)];
+  }
+  return prev;
+}
+
 /**
  * Subscribe to a chat by ID. Returns the in-memory message list for the chat,
  * a streaming flag, and a ``send`` function. Initial history must be seeded
@@ -122,27 +198,42 @@ export function useNanobotStream(
 
       if (ev.event === "delta") {
         if (suppressStreamUntilTurnEndRef.current) return;
-        const id = buffer.current?.messageId ?? crypto.randomUUID();
-        if (!buffer.current) {
-          buffer.current = { messageId: id, parts: [] };
-          setMessages((prev) => [
-            ...prev,
-            {
-              id,
-              role: "assistant",
-              content: "",
-              isStreaming: true,
-              createdAt: Date.now(),
-            },
-          ]);
-          setIsStreaming(true);
-        }
-        buffer.current.parts.push(ev.text);
-        const combined = buffer.current.parts.join("");
-        const targetId = buffer.current.messageId;
-        setMessages((prev) =>
-          prev.map((m) => (m.id === targetId ? { ...m, content: combined } : m)),
-        );
+        const chunk = ev.text;
+        setIsStreaming(true);
+        setMessages((prev) => {
+          // Reuse an in-flight assistant placeholder (typically created by
+          // ``reasoning_delta``) so the answer renders below its own
+          // thinking trace instead of in a parallel row.
+          const adopted = !buffer.current ? findActiveAssistantPlaceholder(prev) : null;
+          let targetId: string;
+          let next: UIMessage[];
+          if (buffer.current) {
+            targetId = buffer.current.messageId;
+            next = prev;
+          } else if (adopted) {
+            targetId = adopted;
+            buffer.current = { messageId: targetId, parts: [] };
+            next = prev;
+          } else {
+            targetId = crypto.randomUUID();
+            buffer.current = { messageId: targetId, parts: [] };
+            next = [
+              ...prev,
+              {
+                id: targetId,
+                role: "assistant",
+                content: "",
+                isStreaming: true,
+                createdAt: Date.now(),
+              },
+            ];
+          }
+          buffer.current.parts.push(chunk);
+          const combined = buffer.current.parts.join("");
+          return next.map((m) =>
+            m.id === targetId ? { ...m, content: combined, isStreaming: true } : m,
+          );
+        });
         return;
       }
 
@@ -159,6 +250,21 @@ export function useNanobotStream(
         return;
       }
 
+      if (ev.event === "reasoning_delta") {
+        if (suppressStreamUntilTurnEndRef.current) return;
+        const chunk = ev.text;
+        if (!chunk) return;
+        setMessages((prev) => attachReasoningChunk(prev, chunk));
+        setIsStreaming(true);
+        return;
+      }
+
+      if (ev.event === "reasoning_end") {
+        if (suppressStreamUntilTurnEndRef.current) return;
+        setMessages((prev) => closeReasoningStream(prev));
+        return;
+      }
+
       if (ev.event === "turn_end") {
         // Definitive signal that the turn is fully complete.  Cancel any
         // pending debounce timer and stop the loading indicator immediately.
@@ -187,37 +293,13 @@ export function useNanobotStream(
         ) {
           return;
         }
-        // Model reasoning rides its own channel: stash it on the next
-        // assistant turn so the bubble renders it as a subordinate trace.
-        // If the assistant message hasn't materialized yet (typical, since
-        // reasoning fires before tool calls/answers), park it on a sentinel
-        // pending row that the next assistant message absorbs.
+        // Back-compat: a legacy ``kind: "reasoning"`` message (no streaming
+        // partner) is treated as one complete delta + immediate end so the
+        // bubble renders identically to the streaming path.
         if (ev.kind === "reasoning") {
           const line = ev.text;
           if (!line) return;
-          setMessages((prev) => {
-            for (let i = prev.length - 1; i >= 0; i -= 1) {
-              const candidate = prev[i];
-              if (candidate.role === "assistant" && candidate.kind !== "trace") {
-                const merged: UIMessage = {
-                  ...candidate,
-                  reasoning: [...(candidate.reasoning ?? []), line],
-                };
-                return [...prev.slice(0, i), merged, ...prev.slice(i + 1)];
-              }
-            }
-            return [
-              ...prev,
-              {
-                id: crypto.randomUUID(),
-                role: "assistant",
-                content: "",
-                isStreaming: true,
-                reasoning: [line],
-                createdAt: Date.now(),
-              },
-            ];
-          });
+          setMessages((prev) => closeReasoningStream(attachReasoningChunk(prev, line)));
           return;
         }
         // Intermediate agent breadcrumbs (tool-call hints, raw progress).
diff --git a/webui/src/i18n/locales/en/common.json b/webui/src/i18n/locales/en/common.json
index 1f6eb7b54..e82a8f5b7 100644
--- a/webui/src/i18n/locales/en/common.json
+++ b/webui/src/i18n/locales/en/common.json
@@ -333,6 +333,7 @@
     "toolSingle": "Using a tool",
     "toolMany": "Used {{count}} tools",
     "reasoning": "Thinking",
+    "reasoningStreaming": "Thinking…",
     "imageAttachment": "Image attachment",
     "copyReply": "Copy reply",
     "copiedReply": "Copied reply"
diff --git a/webui/src/i18n/locales/zh-CN/common.json b/webui/src/i18n/locales/zh-CN/common.json
index 662a5f7bd..18d4b5e16 100644
--- a/webui/src/i18n/locales/zh-CN/common.json
+++ b/webui/src/i18n/locales/zh-CN/common.json
@@ -320,7 +320,8 @@
     "assistantTyping": "助手正在输入",
     "toolSingle": "正在使用工具",
     "toolMany": "已使用 {{count}} 个工具",
-    "reasoning": "思考中",
+    "reasoning": "思考过程",
+    "reasoningStreaming": "正在思考…",
     "imageAttachment": "图片附件",
     "copyReply": "复制回复",
     "copiedReply": "已复制回复"
diff --git a/webui/src/lib/types.ts b/webui/src/lib/types.ts
index 0338b75f3..25c317753 100644
--- a/webui/src/lib/types.ts
+++ b/webui/src/lib/types.ts
@@ -44,10 +44,13 @@ export interface UIMessage {
   images?: UIImage[];
   /** Signed or local UI-renderable media attachments. */
   media?: UIMediaAttachment[];
-  /** Assistant turn: model reasoning / thinking content collected from
-   * `kind: "reasoning"` frames. Each entry is one emit cycle, joined with
-   * blank lines on render. */
-  reasoning?: string[];
+  /** Assistant turn: accumulated model reasoning / thinking text. Built up
+   * incrementally from ``reasoning_delta`` frames; finalized when
+   * ``reasoning_end`` arrives. */
+  reasoning?: string;
+  /** True while ``reasoning_delta`` frames are still arriving for this turn.
+   * Drives the shimmer header on ``ReasoningBubble``. */
+  reasoningStreaming?: boolean;
 }
 
 export interface ChatSummary {
@@ -158,6 +161,17 @@ export type InboundEvent =
       chat_id: string;
       stream_id?: string;
     }
+  | {
+      event: "reasoning_delta";
+      chat_id: string;
+      text: string;
+      stream_id?: string;
+    }
+  | {
+      event: "reasoning_end";
+      chat_id: string;
+      stream_id?: string;
+    }
   | {
       event: "runtime_model_updated";
       model_name: string;
diff --git a/webui/src/tests/message-bubble.test.tsx b/webui/src/tests/message-bubble.test.tsx
index 77608b121..29c40a3b8 100644
--- a/webui/src/tests/message-bubble.test.tsx
+++ b/webui/src/tests/message-bubble.test.tsx
@@ -103,37 +103,41 @@ describe("MessageBubble", () => {
     expect(container.querySelector("video[controls]")).toBeInTheDocument();
   });
 
-  it("surfaces reasoning content above the assistant answer when provided", () => {
+  it("auto-expands the reasoning trace while streaming with a shimmer header", () => {
     const message: UIMessage = {
-      id: "a-reasoning",
+      id: "a-reasoning-streaming",
+      role: "assistant",
+      content: "",
+      createdAt: Date.now(),
+      reasoning: "Step 1: parse intent. Step 2: compute.",
+      reasoningStreaming: true,
+    };
+
+    const { container } = render(<MessageBubble message={message} />);
+
+    expect(screen.getByText("Thinking…")).toBeInTheDocument();
+    expect(screen.getByText(/Step 1: parse intent\./)).toBeInTheDocument();
+    expect(container.querySelector(".reasoning-shimmer")).toBeInTheDocument();
+  });
+
+  it("collapses the reasoning section by default once streaming ends", () => {
+    const message: UIMessage = {
+      id: "a-reasoning-done",
       role: "assistant",
       content: "The answer is 42.",
       createdAt: Date.now(),
-      reasoning: ["Step 1: parse intent.", "Step 2: compute."],
+      reasoning: "hidden until expanded",
+      reasoningStreaming: false,
     };
 
     render(<MessageBubble message={message} />);
 
     expect(screen.getByText("Thinking")).toBeInTheDocument();
-    expect(screen.getByText(/Step 1: parse intent\./)).toBeInTheDocument();
-    expect(screen.getByText(/Step 2: compute\./)).toBeInTheDocument();
     expect(screen.getByText("The answer is 42.")).toBeInTheDocument();
-  });
+    expect(screen.queryByText("hidden until expanded")).not.toBeInTheDocument();
 
-  it("collapses the reasoning section when toggled", () => {
-    const message: UIMessage = {
-      id: "a-reasoning-collapse",
-      role: "assistant",
-      content: "done",
-      createdAt: Date.now(),
-      reasoning: ["hidden after toggle"],
-    };
-
-    render(<MessageBubble message={message} />);
-
-    expect(screen.getByText("hidden after toggle")).toBeInTheDocument();
     fireEvent.click(screen.getByRole("button", { name: /thinking/i }));
-    expect(screen.queryByText("hidden after toggle")).not.toBeInTheDocument();
+    expect(screen.getByText("hidden until expanded")).toBeInTheDocument();
   });
 
   it("renders assistant image media as a larger generated result", () => {
diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx
index 7fb94063c..145d36c1c 100644
--- a/webui/src/tests/useNanobotStream.test.tsx
+++ b/webui/src/tests/useNanobotStream.test.tsx
@@ -113,7 +113,7 @@ describe("useNanobotStream", () => {
     expect(result.current.messages[1].kind).toBeUndefined();
   });
 
-  it("parks reasoning frames on a placeholder assistant message until the answer arrives", () => {
+  it("accumulates reasoning_delta chunks on a placeholder until reasoning_end", () => {
     const fake = fakeClient();
     const { result } = renderHook(() => useNanobotStream("chat-r", EMPTY_MESSAGES), {
       wrapper: wrap(fake.client),
@@ -121,28 +121,31 @@ describe("useNanobotStream", () => {
 
     act(() => {
       fake.emit("chat-r", {
-        event: "message",
+        event: "reasoning_delta",
         chat_id: "chat-r",
-        text: "Let me think step by step.",
-        kind: "reasoning",
+        text: "Let me think ",
       });
       fake.emit("chat-r", {
-        event: "message",
+        event: "reasoning_delta",
         chat_id: "chat-r",
-        text: "First, decompose the request.",
-        kind: "reasoning",
+        text: "step by step.",
       });
     });
 
     expect(result.current.messages).toHaveLength(1);
     expect(result.current.messages[0].role).toBe("assistant");
-    expect(result.current.messages[0].reasoning).toEqual([
-      "Let me think step by step.",
-      "First, decompose the request.",
-    ]);
+    expect(result.current.messages[0].reasoning).toBe("Let me think step by step.");
+    expect(result.current.messages[0].reasoningStreaming).toBe(true);
+
+    act(() => {
+      fake.emit("chat-r", { event: "reasoning_end", chat_id: "chat-r" });
+    });
+
+    expect(result.current.messages[0].reasoningStreaming).toBe(false);
+    expect(result.current.messages[0].reasoning).toBe("Let me think step by step.");
   });
 
-  it("attaches reasoning to the latest assistant turn rather than spawning a new one", () => {
+  it("absorbs a streaming reasoning placeholder into the answer turn that follows", () => {
     const fake = fakeClient();
     const { result } = renderHook(() => useNanobotStream("chat-r2", EMPTY_MESSAGES), {
       wrapper: wrap(fake.client),
@@ -150,24 +153,26 @@ describe("useNanobotStream", () => {
 
     act(() => {
       fake.emit("chat-r2", {
-        event: "message",
+        event: "reasoning_delta",
+        chat_id: "chat-r2",
+        text: "Plan first.",
+      });
+      fake.emit("chat-r2", { event: "reasoning_end", chat_id: "chat-r2" });
+      fake.emit("chat-r2", {
+        event: "delta",
         chat_id: "chat-r2",
         text: "The answer is 42.",
       });
-      fake.emit("chat-r2", {
-        event: "message",
-        chat_id: "chat-r2",
-        text: "Reasoning surfaced post-hoc.",
-        kind: "reasoning",
-      });
+      fake.emit("chat-r2", { event: "stream_end", chat_id: "chat-r2" });
     });
 
     expect(result.current.messages).toHaveLength(1);
     expect(result.current.messages[0].content).toBe("The answer is 42.");
-    expect(result.current.messages[0].reasoning).toEqual(["Reasoning surfaced post-hoc."]);
+    expect(result.current.messages[0].reasoning).toBe("Plan first.");
+    expect(result.current.messages[0].reasoningStreaming).toBe(false);
   });
 
-  it("ignores empty reasoning frames", () => {
+  it("ignores empty reasoning_delta frames", () => {
     const fake = fakeClient();
     const { result } = renderHook(() => useNanobotStream("chat-r3", EMPTY_MESSAGES), {
       wrapper: wrap(fake.client),
@@ -175,16 +180,35 @@ describe("useNanobotStream", () => {
 
     act(() => {
       fake.emit("chat-r3", {
-        event: "message",
+        event: "reasoning_delta",
         chat_id: "chat-r3",
         text: "",
-        kind: "reasoning",
       });
     });
 
     expect(result.current.messages).toHaveLength(0);
   });
 
+  it("treats legacy kind=reasoning messages as a complete delta + end pair", () => {
+    const fake = fakeClient();
+    const { result } = renderHook(() => useNanobotStream("chat-r4", EMPTY_MESSAGES), {
+      wrapper: wrap(fake.client),
+    });
+
+    act(() => {
+      fake.emit("chat-r4", {
+        event: "message",
+        chat_id: "chat-r4",
+        text: "one-shot reasoning",
+        kind: "reasoning",
+      });
+    });
+
+    expect(result.current.messages).toHaveLength(1);
+    expect(result.current.messages[0].reasoning).toBe("one-shot reasoning");
+    expect(result.current.messages[0].reasoningStreaming).toBe(false);
+  });
+
   it("attaches assistant media_urls to complete messages", () => {
     const fake = fakeClient();
     const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), {

From 9829cf66d2530d3eb41722cf29404824557fa589 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 07:20:36 +0000
Subject: [PATCH 07/17] fix(webui): keep late reasoning attached above the
 answer

Some providers only surface structured `reasoning_content` after answer
text has already streamed. The WebUI was treating those late
`reasoning_delta` frames as a fresh assistant placeholder, so the
Thinking bubble rendered below the already-visible answer.

Attach late reasoning back to the active assistant turn instead. The
bubble still renders above the message content, preserving the expected
Thinking -> answer order even when the provider protocol delivers the
reasoning post-hoc. Added a regression test for answer-first followed by
reasoning_delta/reasoning_end.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 webui/src/hooks/useNanobotStream.ts       | 16 +++++++++----
 webui/src/tests/useNanobotStream.test.tsx | 29 +++++++++++++++++++++++
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts
index 60736b393..8e83b9eb2 100644
--- a/webui/src/hooks/useNanobotStream.ts
+++ b/webui/src/hooks/useNanobotStream.ts
@@ -21,17 +21,23 @@ interface StreamBuffer {
 /**
  * Append a reasoning chunk to the last open reasoning stream in ``prev``.
  *
- * Lookup rule: find the most recent assistant turn that is either still
- * streaming reasoning (``reasoningStreaming``) or has no answer text yet.
- * Anything else starts a fresh streaming placeholder so a new turn's
- * reasoning never bleeds into the previous answer.
+ * Lookup rule: prefer the most recent assistant turn in the active UI tail.
+ * Most providers emit reasoning before answer text, but some only expose
+ * ``reasoning_content`` after the answer stream completes. In that post-hoc
+ * case the reasoning still belongs to the same assistant turn and must render
+ * above the answer, not as a new row below it.
  */
 function attachReasoningChunk(prev: UIMessage[], chunk: string): UIMessage[] {
   for (let i = prev.length - 1; i >= 0; i -= 1) {
     const candidate = prev[i];
     if (candidate.role !== "assistant" || candidate.kind === "trace") continue;
     const hasAnswer = candidate.content.length > 0;
-    if (candidate.reasoningStreaming || (!hasAnswer && candidate.reasoning !== undefined)) {
+    if (
+      candidate.reasoningStreaming
+      || candidate.reasoning !== undefined
+      || hasAnswer
+      || candidate.isStreaming
+    ) {
       const merged: UIMessage = {
         ...candidate,
         reasoning: (candidate.reasoning ?? "") + chunk,
diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx
index 145d36c1c..f621437fd 100644
--- a/webui/src/tests/useNanobotStream.test.tsx
+++ b/webui/src/tests/useNanobotStream.test.tsx
@@ -209,6 +209,35 @@ describe("useNanobotStream", () => {
     expect(result.current.messages[0].reasoningStreaming).toBe(false);
   });
 
+  it("attaches post-hoc reasoning to the same assistant turn above the answer", () => {
+    const fake = fakeClient();
+    const { result } = renderHook(() => useNanobotStream("chat-r5", EMPTY_MESSAGES), {
+      wrapper: wrap(fake.client),
+    });
+
+    act(() => {
+      fake.emit("chat-r5", {
+        event: "delta",
+        chat_id: "chat-r5",
+        text: "hi~",
+      });
+      fake.emit("chat-r5", { event: "stream_end", chat_id: "chat-r5" });
+      fake.emit("chat-r5", {
+        event: "reasoning_delta",
+        chat_id: "chat-r5",
+        text: "This reasoning arrived after the answer stream.",
+      });
+      fake.emit("chat-r5", { event: "reasoning_end", chat_id: "chat-r5" });
+    });
+
+    expect(result.current.messages).toHaveLength(1);
+    expect(result.current.messages[0].content).toBe("hi~");
+    expect(result.current.messages[0].reasoning).toBe(
+      "This reasoning arrived after the answer stream.",
+    );
+    expect(result.current.messages[0].reasoningStreaming).toBe(false);
+  });
+
   it("attaches assistant media_urls to complete messages", () => {
     const fake = fakeClient();
     const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), {

From 0033a8a1852df30b3fdb3c8f7f093659e8b443a3 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 07:28:54 +0000
Subject: [PATCH 08/17] fix(webui): keep reasoning scoped to the current user
 turn

The post-hoc reasoning fix allowed late reasoning frames to attach back to
the nearest assistant message, but the scan crossed a newer user message.
That made the next turn's Thinking bubble render above the previous
assistant reply.

Treat the latest user message as a hard boundary: reasoning after it must
start a new assistant placeholder and can no longer attach to earlier
assistant turns. Add a regression covering previous assistant -> new user
-> reasoning_delta.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 webui/src/hooks/useNanobotStream.ts       |  3 ++
 webui/src/tests/useNanobotStream.test.tsx | 38 +++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts
index 8e83b9eb2..d2a229730 100644
--- a/webui/src/hooks/useNanobotStream.ts
+++ b/webui/src/hooks/useNanobotStream.ts
@@ -30,6 +30,9 @@ interface StreamBuffer {
 function attachReasoningChunk(prev: UIMessage[], chunk: string): UIMessage[] {
   for (let i = prev.length - 1; i >= 0; i -= 1) {
     const candidate = prev[i];
+    // A user turn is a hard boundary: reasoning after it belongs to the new
+    // assistant turn, never to an earlier assistant reply.
+    if (candidate.role === "user") break;
     if (candidate.role !== "assistant" || candidate.kind === "trace") continue;
     const hasAnswer = candidate.content.length > 0;
     if (
diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx
index f621437fd..41e6ca3cf 100644
--- a/webui/src/tests/useNanobotStream.test.tsx
+++ b/webui/src/tests/useNanobotStream.test.tsx
@@ -238,6 +238,44 @@ describe("useNanobotStream", () => {
     expect(result.current.messages[0].reasoningStreaming).toBe(false);
   });
 
+  it("does not attach a new turn's reasoning across the latest user boundary", () => {
+    const fake = fakeClient();
+    const initialMessages = [
+      {
+        id: "a-prev",
+        role: "assistant" as const,
+        content: "Previous answer.",
+        reasoning: "Previous thought.",
+        createdAt: Date.now(),
+      },
+      {
+        id: "u-next",
+        role: "user" as const,
+        content: "Next question",
+        createdAt: Date.now(),
+      },
+    ];
+    const { result } = renderHook(
+      () => useNanobotStream("chat-r6", initialMessages),
+      { wrapper: wrap(fake.client) },
+    );
+
+    act(() => {
+      fake.emit("chat-r6", {
+        event: "reasoning_delta",
+        chat_id: "chat-r6",
+        text: "New turn thinking.",
+      });
+    });
+
+    expect(result.current.messages).toHaveLength(3);
+    expect(result.current.messages[0].reasoning).toBe("Previous thought.");
+    expect(result.current.messages[2].role).toBe("assistant");
+    expect(result.current.messages[2].content).toBe("");
+    expect(result.current.messages[2].reasoning).toBe("New turn thinking.");
+    expect(result.current.messages[2].reasoningStreaming).toBe(true);
+  });
+
   it("attaches assistant media_urls to complete messages", () => {
     const fake = fakeClient();
     const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), {

From 278affc25e461b6235708798ab9dd5ec946ae064 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 07:33:52 +0000
Subject: [PATCH 09/17] fix(webui): hydrate reasoning and tool traces from
 history

Live reasoning/tool frames were rendering correctly, but refreshing WebUI
replayed only role/content/media from `/api/sessions/:key/messages`.
Assistant `reasoning_content` / `thinking_blocks` and `tool_calls` were
already persisted by the backend and returned by the history endpoint, but
useSessionHistory discarded them.

Hydrate persisted assistant reasoning into `UIMessage.reasoning` and
reconstruct assistant tool calls as `kind: "trace"` rows so the replayed
thread keeps the same Thinking bubble and Used tools block as the live
stream. Tool result rows remain hidden from the conversation view to avoid
replaying raw tool output as chat text.

Adds regression coverage for both persisted reasoning and historical tool
call trace hydration.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 webui/src/hooks/useSessions.ts       | 66 +++++++++++++++++++--
 webui/src/lib/api.ts                 |  2 +
 webui/src/tests/useSessions.test.tsx | 86 ++++++++++++++++++++++++++++
 3 files changed, 149 insertions(+), 5 deletions(-)

diff --git a/webui/src/hooks/useSessions.ts b/webui/src/hooks/useSessions.ts
index e05e16a20..d1be437b7 100644
--- a/webui/src/hooks/useSessions.ts
+++ b/webui/src/hooks/useSessions.ts
@@ -14,6 +14,48 @@ import type { ChatSummary, UIMessage } from "@/lib/types";
 
 const EMPTY_MESSAGES: UIMessage[] = [];
 
+type HistoryMessage = Awaited<ReturnType<typeof fetchSessionMessages>>["messages"][number];
+
+function reasoningFromHistory(message: HistoryMessage): string | undefined {
+  if (typeof message.reasoning_content === "string" && message.reasoning_content.trim()) {
+    return message.reasoning_content;
+  }
+  if (!Array.isArray(message.thinking_blocks)) return undefined;
+  const parts = message.thinking_blocks
+    .map((block) => {
+      if (!block || typeof block !== "object") return "";
+      const thinking = (block as { thinking?: unknown }).thinking;
+      return typeof thinking === "string" ? thinking.trim() : "";
+    })
+    .filter(Boolean);
+  return parts.length > 0 ? parts.join("\n\n") : undefined;
+}
+
+function formatToolCallTrace(call: unknown): string | null {
+  if (!call || typeof call !== "object") return null;
+  const item = call as {
+    name?: unknown;
+    function?: { name?: unknown; arguments?: unknown };
+  };
+  const name =
+    typeof item.function?.name === "string"
+      ? item.function.name
+      : typeof item.name === "string"
+        ? item.name
+        : "";
+  if (!name) return null;
+  const args = item.function?.arguments;
+  if (typeof args === "string" && args.trim()) return `${name}(${args})`;
+  return `${name}()`;
+}
+
+function toolTracesFromHistory(message: HistoryMessage): string[] {
+  if (!Array.isArray(message.tool_calls)) return [];
+  return message.tool_calls
+    .map(formatToolCallTrace)
+    .filter((trace): trace is string => !!trace);
+}
+
 /** Sidebar state: fetches the full session list and exposes create / delete actions. */
 export function useSessions(): {
   sessions: ChatSummary[];
@@ -143,14 +185,28 @@ export function useSessionHistory(key: string | null): {
             m.role === "user" && media?.every((item) => item.kind === "image")
               ? media.map((item) => ({ url: item.url, name: item.name }))
               : undefined;
+          const row: UIMessage = {
+            id: `hist-${idx}`,
+            role: m.role,
+            content: m.content,
+            createdAt: m.timestamp ? Date.parse(m.timestamp) : Date.now(),
+            ...(images ? { images } : {}),
+            ...(media ? { media } : {}),
+            ...(m.role === "assistant" && reasoningFromHistory(m)
+              ? { reasoning: reasoningFromHistory(m), reasoningStreaming: false }
+              : {}),
+          };
+          const traces = m.role === "assistant" ? toolTracesFromHistory(m) : [];
+          if (traces.length === 0) return [row];
           return [
+            ...(row.content.trim() || row.reasoning || row.media?.length ? [row] : []),
             {
-              id: `hist-${idx}`,
-              role: m.role,
-              content: m.content,
+              id: `hist-${idx}-tools`,
+              role: "tool" as const,
+              kind: "trace" as const,
+              content: traces[traces.length - 1],
+              traces,
               createdAt: m.timestamp ? Date.parse(m.timestamp) : Date.now(),
-              ...(images ? { images } : {}),
-              ...(media ? { media } : {}),
             },
           ];
         });
diff --git a/webui/src/lib/api.ts b/webui/src/lib/api.ts
index 23a8c2a67..c27ebd3d6 100644
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -89,6 +89,8 @@ export async function fetchSessionMessages(
     content: string;
     timestamp?: string;
     tool_calls?: unknown;
+    reasoning_content?: string | null;
+    thinking_blocks?: unknown;
     tool_call_id?: string;
     name?: string;
     /** Present on ``user`` turns that attached images. Paths have already
diff --git a/webui/src/tests/useSessions.test.tsx b/webui/src/tests/useSessions.test.tsx
index 4805c6567..988b97252 100644
--- a/webui/src/tests/useSessions.test.tsx
+++ b/webui/src/tests/useSessions.test.tsx
@@ -170,6 +170,92 @@ describe("useSessions", () => {
     ]);
   });
 
+  it("hydrates persisted assistant reasoning into the replayed message", async () => {
+    vi.mocked(api.fetchSessionMessages).mockResolvedValue({
+      key: "websocket:chat-reasoning",
+      created_at: "2026-04-20T10:00:00Z",
+      updated_at: "2026-04-20T10:05:00Z",
+      messages: [
+        {
+          role: "assistant",
+          content: "final answer",
+          timestamp: "2026-04-20T10:00:01Z",
+          reasoning_content: "hidden but persisted reasoning",
+        },
+      ],
+    });
+
+    const { result } = renderHook(() => useSessionHistory("websocket:chat-reasoning"), {
+      wrapper: wrap(fakeClient()),
+    });
+
+    await waitFor(() => expect(result.current.loading).toBe(false));
+
+    expect(result.current.messages).toHaveLength(1);
+    expect(result.current.messages[0].role).toBe("assistant");
+    expect(result.current.messages[0].content).toBe("final answer");
+    expect(result.current.messages[0].reasoning).toBe("hidden but persisted reasoning");
+    expect(result.current.messages[0].reasoningStreaming).toBe(false);
+  });
+
+  it("hydrates historical assistant tool calls into a replay trace row", async () => {
+    vi.mocked(api.fetchSessionMessages).mockResolvedValue({
+      key: "websocket:chat-tools",
+      created_at: "2026-04-20T10:00:00Z",
+      updated_at: "2026-04-20T10:05:00Z",
+      messages: [
+        {
+          role: "user",
+          content: "research this",
+          timestamp: "2026-04-20T10:00:00Z",
+        },
+        {
+          role: "assistant",
+          content: "",
+          timestamp: "2026-04-20T10:00:01Z",
+          tool_calls: [
+            {
+              id: "call-1",
+              type: "function",
+              function: { name: "web_search", arguments: "{\"query\":\"agents\"}" },
+            },
+            {
+              id: "call-2",
+              type: "function",
+              function: { name: "web_fetch", arguments: "{\"url\":\"https://example.com\"}" },
+            },
+          ],
+        },
+        {
+          role: "tool",
+          content: "tool output that should not render directly",
+          timestamp: "2026-04-20T10:00:02Z",
+          tool_call_id: "call-1",
+        },
+        {
+          role: "assistant",
+          content: "summary",
+          timestamp: "2026-04-20T10:00:03Z",
+        },
+      ],
+    });
+
+    const { result } = renderHook(() => useSessionHistory("websocket:chat-tools"), {
+      wrapper: wrap(fakeClient()),
+    });
+
+    await waitFor(() => expect(result.current.loading).toBe(false));
+
+    expect(result.current.messages.map((m) => m.role)).toEqual(["user", "tool", "assistant"]);
+    const trace = result.current.messages[1];
+    expect(trace.kind).toBe("trace");
+    expect(trace.traces).toEqual([
+      "web_search({\"query\":\"agents\"})",
+      "web_fetch({\"url\":\"https://example.com\"})",
+    ]);
+    expect(result.current.messages[2].content).toBe("summary");
+  });
+
   it("flags history with trailing assistant tool calls as still pending", async () => {
     vi.mocked(api.fetchSessionMessages).mockResolvedValue({
       key: "websocket:chat-pending",

From 521aaa5ecfb1a65f1f7d203ad1913575734028d1 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 07:49:44 +0000
Subject: [PATCH 10/17] fix(webui): split reasoning at tool trace boundaries

Live rendering merged reasoning chunks by scanning backward to the latest
assistant row. That fixed late reasoning, but the scan skipped trace rows,
so reasoning after a tool call crossed the Used tools block and attached to
the previous assistant iteration. Refresh looked correct because persisted
history reconstructs assistant/tool boundaries.

Treat trace rows as hard phase boundaries, just like user messages. A
reasoning_delta after Used tools now starts a fresh assistant placeholder,
so live rendering matches replay: Thinking -> Used tools -> Thinking ->
Used tools / answer.

Add a regression for reasoning_delta -> reasoning_end -> tool_hint ->
reasoning_delta.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 webui/src/hooks/useNanobotStream.ts       |  6 +++-
 webui/src/tests/useNanobotStream.test.tsx | 39 +++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts
index d2a229730..10f1e2400 100644
--- a/webui/src/hooks/useNanobotStream.ts
+++ b/webui/src/hooks/useNanobotStream.ts
@@ -33,7 +33,11 @@ function attachReasoningChunk(prev: UIMessage[], chunk: string): UIMessage[] {
     // A user turn is a hard boundary: reasoning after it belongs to the new
     // assistant turn, never to an earlier assistant reply.
     if (candidate.role === "user") break;
-    if (candidate.role !== "assistant" || candidate.kind === "trace") continue;
+    // A trace row (e.g. Used tools) is also a phase boundary. Reasoning after
+    // tools belongs to the next assistant iteration, not the assistant turn
+    // that produced those tool calls.
+    if (candidate.kind === "trace") break;
+    if (candidate.role !== "assistant") continue;
     const hasAnswer = candidate.content.length > 0;
     if (
       candidate.reasoningStreaming
diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx
index 41e6ca3cf..0aa069cfb 100644
--- a/webui/src/tests/useNanobotStream.test.tsx
+++ b/webui/src/tests/useNanobotStream.test.tsx
@@ -276,6 +276,45 @@ describe("useNanobotStream", () => {
     expect(result.current.messages[2].reasoningStreaming).toBe(true);
   });
 
+  it("does not attach reasoning across a tool trace boundary", () => {
+    const fake = fakeClient();
+    const { result } = renderHook(() => useNanobotStream("chat-r7", EMPTY_MESSAGES), {
+      wrapper: wrap(fake.client),
+    });
+
+    act(() => {
+      fake.emit("chat-r7", {
+        event: "reasoning_delta",
+        chat_id: "chat-r7",
+        text: "First reasoning.",
+      });
+      fake.emit("chat-r7", { event: "reasoning_end", chat_id: "chat-r7" });
+      fake.emit("chat-r7", {
+        event: "message",
+        chat_id: "chat-r7",
+        text: "web_search({\"query\":\"OpenClaw\"})",
+        kind: "tool_hint",
+      });
+      fake.emit("chat-r7", {
+        event: "reasoning_delta",
+        chat_id: "chat-r7",
+        text: "Second reasoning.",
+      });
+    });
+
+    expect(result.current.messages).toHaveLength(3);
+    expect(result.current.messages.map((m) => m.kind ?? "message")).toEqual([
+      "message",
+      "trace",
+      "message",
+    ]);
+    expect(result.current.messages[0].reasoning).toBe("First reasoning.");
+    expect(result.current.messages[1].traces).toEqual([
+      "web_search({\"query\":\"OpenClaw\"})",
+    ]);
+    expect(result.current.messages[2].reasoning).toBe("Second reasoning.");
+  });
+
   it("attaches assistant media_urls to complete messages", () => {
     const fake = fakeClient();
     const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), {

From c7ec5d3b75bac7cc667abb702d808c901843e865 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 07:58:24 +0000
Subject: [PATCH 11/17] fix(webui): align thinking and tool trace affordances

Tool trace groups are supporting details, so default them to collapsed.
Match the Thinking bubble's expanded body to the tool trace affordance by
using the same grouped header and animated fade/slide body treatment.

Update MessageBubble tests to assert tool traces start collapsed and expand
on click.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 webui/src/components/MessageBubble.tsx  | 11 ++++++-----
 webui/src/tests/message-bubble.test.tsx |  7 ++++---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/webui/src/components/MessageBubble.tsx b/webui/src/components/MessageBubble.tsx
index 9002ad500..abf85f663 100644
--- a/webui/src/components/MessageBubble.tsx
+++ b/webui/src/components/MessageBubble.tsx
@@ -386,14 +386,14 @@ interface TraceGroupProps {
 
 /**
  * Collapsible group of tool-call / progress breadcrumbs. Defaults to
- * expanded for discoverability; a single click on the header folds the
- * group down to a one-line summary so it never dominates the thread.
+ * collapsed because tool traces are supporting evidence, not the answer.
+ * A single click expands the exact calls when the user wants details.
  */
 function TraceGroup({ message, animClass }: TraceGroupProps) {
   const { t } = useTranslation();
   const lines = message.traces ?? [message.content];
   const count = lines.length;
-  const [open, setOpen] = useState(true);
+  const [open, setOpen] = useState(false);
   return (
     <div className={cn("w-full", animClass)}>
       <button
@@ -471,7 +471,7 @@ function ReasoningBubble({ text, streaming }: ReasoningBubbleProps) {
         type="button"
         onClick={onToggle}
         className={cn(
-          "flex w-full items-center gap-2 rounded-md px-2 py-1.5",
+          "group flex w-full items-center gap-2 rounded-md px-2 py-1.5",
           "text-xs text-muted-foreground transition-colors hover:bg-muted/45",
           streaming && "reasoning-shimmer",
         )}
@@ -498,7 +498,8 @@ function ReasoningBubble({ text, streaming }: ReasoningBubbleProps) {
       {open && text.length > 0 && (
         <div
           className={cn(
-            "mt-1 whitespace-pre-wrap break-words border-l border-muted-foreground/20 pl-3",
+            "mt-1 space-y-0.5 whitespace-pre-wrap break-words border-l border-muted-foreground/20 pl-3",
+            "animate-in fade-in-0 slide-in-from-top-1 duration-200",
             "text-[12.5px] italic leading-relaxed text-muted-foreground/85",
           )}
         >
diff --git a/webui/src/tests/message-bubble.test.tsx b/webui/src/tests/message-bubble.test.tsx
index 29c40a3b8..33b7ac05f 100644
--- a/webui/src/tests/message-bubble.test.tsx
+++ b/webui/src/tests/message-bubble.test.tsx
@@ -72,11 +72,12 @@ describe("MessageBubble", () => {
     render(<MessageBubble message={message} />);
     const toggle = screen.getByRole("button", { name: /used 2 tools/i });
 
-    expect(screen.getByText('weather("get")')).toBeInTheDocument();
-    expect(screen.getByText('search "hk weather"')).toBeInTheDocument();
+    expect(screen.queryByText('weather("get")')).not.toBeInTheDocument();
+    expect(screen.queryByText('search "hk weather"')).not.toBeInTheDocument();
 
     fireEvent.click(toggle);
-    expect(screen.queryByText('weather("get")')).not.toBeInTheDocument();
+    expect(screen.getByText('weather("get")')).toBeInTheDocument();
+    expect(screen.getByText('search "hk weather"')).toBeInTheDocument();
   });
 
   it("renders video media as an inline player", () => {

From 82ba63e148f35492d6f425f9765e99c82dd9b8e2 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 08:05:34 +0000
Subject: [PATCH 12/17] fix(webui): compact spacing between auxiliary trace
 rows

Thinking and Used tools are both auxiliary trace rows, but the thread list
was applying the same large gap used between full chat turns. That made
alternating Thinking / Used tools sequences look uneven and too airy.

Move row spacing from a fixed flex gap to per-row margins: full chat turns
keep mt-5, while consecutive auxiliary rows use mt-2. Add coverage for
Thinking -> Used tools -> Thinking spacing.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../src/components/thread/ThreadMessages.tsx  | 29 +++++++++--
 webui/src/tests/thread-messages.test.tsx      | 52 +++++++++++++++++++
 2 files changed, 77 insertions(+), 4 deletions(-)
 create mode 100644 webui/src/tests/thread-messages.test.tsx

diff --git a/webui/src/components/thread/ThreadMessages.tsx b/webui/src/components/thread/ThreadMessages.tsx
index 1ef5c864b..3d3d068f3 100644
--- a/webui/src/components/thread/ThreadMessages.tsx
+++ b/webui/src/components/thread/ThreadMessages.tsx
@@ -1,4 +1,5 @@
 import { MessageBubble } from "@/components/MessageBubble";
+import { cn } from "@/lib/utils";
 import type { UIMessage } from "@/lib/types";
 
 interface ThreadMessagesProps {
@@ -7,10 +8,30 @@ interface ThreadMessagesProps {
 
 export function ThreadMessages({ messages }: ThreadMessagesProps) {
   return (
-    <div className="flex w-full flex-col gap-5">
-      {messages.map((message) => (
-        <MessageBubble key={message.id} message={message} />
-      ))}
+    <div className="flex w-full flex-col">
+      {messages.map((message, index) => {
+        const prev = messages[index - 1];
+        const compact = isAuxiliaryRow(message) && prev && isAuxiliaryRow(prev);
+        return (
+          <div
+            key={message.id}
+            className={cn(index > 0 && (compact ? "mt-2" : "mt-5"))}
+          >
+            <MessageBubble message={message} />
+          </div>
+        );
+      })}
     </div>
   );
 }
+
+function isAuxiliaryRow(message: UIMessage): boolean {
+  return (
+    message.kind === "trace"
+    || (
+      message.role === "assistant"
+      && message.content.trim().length === 0
+      && (!!message.reasoning || !!message.reasoningStreaming)
+    )
+  );
+}
diff --git a/webui/src/tests/thread-messages.test.tsx b/webui/src/tests/thread-messages.test.tsx
new file mode 100644
index 000000000..710b86298
--- /dev/null
+++ b/webui/src/tests/thread-messages.test.tsx
@@ -0,0 +1,52 @@
+import { render } from "@testing-library/react";
+import { describe, expect, it } from "vitest";
+
+import { ThreadMessages } from "@/components/thread/ThreadMessages";
+import type { UIMessage } from "@/lib/types";
+
+describe("ThreadMessages", () => {
+  it("uses compact spacing between consecutive auxiliary rows", () => {
+    const messages: UIMessage[] = [
+      {
+        id: "r1",
+        role: "assistant",
+        content: "",
+        reasoning: "thinking",
+        reasoningStreaming: false,
+        isStreaming: true,
+        createdAt: Date.now(),
+      },
+      {
+        id: "t1",
+        role: "tool",
+        kind: "trace",
+        content: "search()",
+        traces: ["search()"],
+        createdAt: Date.now(),
+      },
+      {
+        id: "r2",
+        role: "assistant",
+        content: "",
+        reasoning: "more thinking",
+        reasoningStreaming: false,
+        isStreaming: true,
+        createdAt: Date.now(),
+      },
+      {
+        id: "a1",
+        role: "assistant",
+        content: "final answer",
+        createdAt: Date.now(),
+      },
+    ];
+
+    const { container } = render(<ThreadMessages messages={messages} />);
+    const rows = Array.from(container.firstElementChild?.children ?? []);
+
+    expect(rows[0]).not.toHaveClass("mt-2", "mt-5");
+    expect(rows[1]).toHaveClass("mt-2");
+    expect(rows[2]).toHaveClass("mt-2");
+    expect(rows[3]).toHaveClass("mt-5");
+  });
+});

From 321c565ec490573550cfbc4bef2a66a20df28778 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 08:12:44 +0000
Subject: [PATCH 13/17] fix(webui): normalize thinking trace row box model

Thinking and Used tools are both auxiliary rows, but Thinking still carried
an internal mb-2 even when it was standalone. That made collapsed Thinking
rows visually taller than tool trace rows despite the shared thread spacing.

Only add the extra bottom margin when a Thinking bubble has answer content
below it in the same assistant message. Standalone Thinking rows now share
the same outer box model as Used tools. Tests lock both standalone and
answer-backed cases.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 webui/src/components/MessageBubble.tsx  | 12 +++++++++---
 webui/src/tests/message-bubble.test.tsx |  2 ++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/webui/src/components/MessageBubble.tsx b/webui/src/components/MessageBubble.tsx
index abf85f663..bd1d8c93b 100644
--- a/webui/src/components/MessageBubble.tsx
+++ b/webui/src/components/MessageBubble.tsx
@@ -92,7 +92,7 @@ export function MessageBubble({ message }: MessageBubbleProps) {
   return (
     <div className={cn("w-full text-[15px]", baseAnim)} style={{ lineHeight: "var(--cjk-line-height)" }}>
       {hasReasoning ? (
-        <ReasoningBubble text={reasoning} streaming={reasoningStreaming} />
+        <ReasoningBubble text={reasoning} streaming={reasoningStreaming} hasBodyBelow={!empty} />
       ) : null}
       {empty && message.isStreaming && !hasReasoning ? (
         <TypingDots />
@@ -443,6 +443,7 @@ function TraceGroup({ message, animClass }: TraceGroupProps) {
 interface ReasoningBubbleProps {
   text: string;
   streaming: boolean;
+  hasBodyBelow: boolean;
 }
 
 /**
@@ -456,7 +457,7 @@ interface ReasoningBubbleProps {
  *     the user can re-expand to inspect the chain of thought. The local
  *     toggle persists once the user interacts.
  */
-function ReasoningBubble({ text, streaming }: ReasoningBubbleProps) {
+function ReasoningBubble({ text, streaming, hasBodyBelow }: ReasoningBubbleProps) {
   const { t } = useTranslation();
   const [userToggled, setUserToggled] = useState(false);
   const [openLocal, setOpenLocal] = useState(true);
@@ -466,7 +467,12 @@ function ReasoningBubble({ text, streaming }: ReasoningBubbleProps) {
     setOpenLocal((v) => (userToggled ? !v : !open));
   };
   return (
-    <div className="mb-2 w-full animate-in fade-in-0 slide-in-from-top-1 duration-200">
+    <div
+      className={cn(
+        "w-full animate-in fade-in-0 slide-in-from-top-1 duration-200",
+        hasBodyBelow && "mb-2",
+      )}
+    >
       <button
         type="button"
         onClick={onToggle}
diff --git a/webui/src/tests/message-bubble.test.tsx b/webui/src/tests/message-bubble.test.tsx
index 33b7ac05f..4f5d504dd 100644
--- a/webui/src/tests/message-bubble.test.tsx
+++ b/webui/src/tests/message-bubble.test.tsx
@@ -119,6 +119,7 @@ describe("MessageBubble", () => {
     expect(screen.getByText("Thinking…")).toBeInTheDocument();
     expect(screen.getByText(/Step 1: parse intent\./)).toBeInTheDocument();
     expect(container.querySelector(".reasoning-shimmer")).toBeInTheDocument();
+    expect(screen.getByRole("button", { name: /thinking/i }).parentElement).not.toHaveClass("mb-2");
   });
 
   it("collapses the reasoning section by default once streaming ends", () => {
@@ -136,6 +137,7 @@ describe("MessageBubble", () => {
     expect(screen.getByText("Thinking")).toBeInTheDocument();
     expect(screen.getByText("The answer is 42.")).toBeInTheDocument();
     expect(screen.queryByText("hidden until expanded")).not.toBeInTheDocument();
+    expect(screen.getByRole("button", { name: /thinking/i }).parentElement).toHaveClass("mb-2");
 
     fireEvent.click(screen.getByRole("button", { name: /thinking/i }));
     expect(screen.getByText("hidden until expanded")).toBeInTheDocument();

From 9d50f1b9336994c3a3222f0143cbd3fd796c6252 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 08:47:34 +0000
Subject: [PATCH 14/17] feat: polish trace delivery and slash menu UX

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 docs/channel-plugin-guide.md                  | 109 +++++++++++
 nanobot/agent/loop.py                         | 177 +----------------
 nanobot/agent/progress_hook.py                | 178 ++++++++++++++++++
 .../src/components/thread/ThreadComposer.tsx  |  99 +++++++++-
 webui/src/components/thread/ThreadShell.tsx   |   1 +
 webui/src/tests/thread-composer.test.tsx      |  84 ++++++++-
 webui/src/tests/thread-shell.test.tsx         |  14 +-
 7 files changed, 482 insertions(+), 180 deletions(-)
 create mode 100644 nanobot/agent/progress_hook.py

diff --git a/docs/channel-plugin-guide.md b/docs/channel-plugin-guide.md
index d37a92883..da668c9ee 100644
--- a/docs/channel-plugin-guide.md
+++ b/docs/channel-plugin-guide.md
@@ -238,6 +238,9 @@ nanobot channels login <channel_name> --force  # re-authenticate
 | `supports_streaming` (property) | `True` when config has `"streaming": true` **and** subclass overrides `send_delta()`. |
 | `is_running` | Returns `self._running`. |
 | `login(force=False)` | Perform interactive login (e.g. QR code scan). Returns `True` if already authenticated or login succeeds. Override in subclasses that support interactive login. |
+| `send_reasoning_delta(chat_id, delta, metadata?)` | Optional hook for streamed model reasoning/thinking content. Default is no-op. |
+| `send_reasoning_end(chat_id, metadata?)` | Optional hook marking the end of a reasoning block. Default is no-op. |
+| `send_reasoning(msg)` | Optional one-shot reasoning fallback. Default translates to `send_reasoning_delta()` + `send_reasoning_end()`. |
 
 ### Optional (streaming)
 
@@ -350,6 +353,112 @@ When `streaming` is `false` (default) or omitted, only `send()` is called — no
 | `async send_delta(chat_id, delta, metadata?)` | Override to handle streaming chunks. No-op by default. |
 | `supports_streaming` (property) | Returns `True` when config has `streaming: true` **and** subclass overrides `send_delta`. |
 
+## Progress, Tool Hints, and Reasoning
+
+Besides normal assistant text, nanobot can emit low-emphasis trace blocks. These are intended for UI affordances like status rows, collapsible "used tools" groups, or reasoning/thinking blocks. Platforms that do not have a good place for them can ignore them safely.
+
+### Progress and Tool Hints
+
+Progress and tool hints arrive through the normal `send(msg)` path. Check `msg.metadata` before rendering:
+
+```python
+async def send(self, msg: OutboundMessage) -> None:
+    meta = msg.metadata or {}
+
+    if meta.get("_tool_hint"):
+        # A short tool breadcrumb, e.g. read_file("config.json")
+        await self._send_trace(msg.chat_id, msg.content, kind="tool")
+        return
+
+    if meta.get("_progress"):
+        # Generic non-final status, e.g. "Thinking..." or "Running command..."
+        await self._send_trace(msg.chat_id, msg.content, kind="progress")
+        return
+
+    await self._send_message(msg.chat_id, msg.content, media=msg.media)
+```
+
+Tool hints are off by default for most channels. Users can enable them globally or per channel:
+
+```json
+{
+  "channels": {
+    "sendToolHints": true,
+    "webhook": {
+      "enabled": true,
+      "sendToolHints": true
+    }
+  }
+}
+```
+
+### Reasoning Blocks
+
+Reasoning is delivered through dedicated optional hooks, not `send()`. Override `send_reasoning_delta()` and `send_reasoning_end()` if your platform can show model reasoning as a subdued/collapsible block. The default implementation is a no-op, so unsupported channels simply drop reasoning content.
+
+```python
+class WebhookChannel(BaseChannel):
+    name = "webhook"
+    display_name = "Webhook"
+
+    def __init__(self, config: Any, bus: MessageBus):
+        if isinstance(config, dict):
+            config = WebhookConfig(**config)
+        super().__init__(config, bus)
+        self._reasoning_buffers: dict[str, str] = {}
+
+    async def send_reasoning_delta(
+        self,
+        chat_id: str,
+        delta: str,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        meta = metadata or {}
+        stream_id = str(meta.get("_stream_id") or chat_id)
+        self._reasoning_buffers[stream_id] = self._reasoning_buffers.get(stream_id, "") + delta
+        await self._update_reasoning_block(chat_id, self._reasoning_buffers[stream_id], final=False)
+
+    async def send_reasoning_end(
+        self,
+        chat_id: str,
+        metadata: dict[str, Any] | None = None,
+    ) -> None:
+        meta = metadata or {}
+        stream_id = str(meta.get("_stream_id") or chat_id)
+        text = self._reasoning_buffers.pop(stream_id, "")
+        if text:
+            await self._update_reasoning_block(chat_id, text, final=True)
+```
+
+**Reasoning metadata flags:**
+
+| Flag | Meaning |
+|------|---------|
+| `_reasoning_delta: True` | A reasoning/thinking chunk; `delta` contains the new text. |
+| `_reasoning_end: True` | The current reasoning block is complete; `delta` is empty. |
+| `_reasoning: True` | Legacy one-shot reasoning. `BaseChannel.send_reasoning()` converts it to delta + end. |
+| `_stream_id` | Stable id for this assistant turn/segment. Use it to key buffers instead of only `chat_id`. |
+
+Reasoning visibility is controlled by `showReasoning` globally or per channel:
+
+```json
+{
+  "channels": {
+    "showReasoning": true,
+    "webhook": {
+      "enabled": true,
+      "showReasoning": true
+    }
+  }
+}
+```
+
+Recommended rendering:
+
+- Render tool hints and progress as trace/status UI, not as normal assistant replies.
+- Render reasoning with lower visual emphasis and collapse it after completion when the platform supports that.
+- Keep reasoning separate from final answer text. A final answer still arrives through `send()` or `send_delta()`.
+
 ## Config
 
 ### Why Pydantic model is required
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
index 7897f89dd..9bfce39fb 100644
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@@ -4,7 +4,6 @@ from __future__ import annotations
 
 import asyncio
 import dataclasses
-import json
 import os
 import time
 from contextlib import AsyncExitStack, nullcontext, suppress
@@ -15,11 +14,12 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable
 
 from loguru import logger
 
+from nanobot.agent import model_presets as preset_helpers
 from nanobot.agent.autocompact import AutoCompact
 from nanobot.agent.context import ContextBuilder
-from nanobot.agent.hook import AgentHook, AgentHookContext, CompositeHook
+from nanobot.agent.hook import AgentHook, CompositeHook
 from nanobot.agent.memory import Consolidator, Dream
-from nanobot.agent import model_presets as preset_helpers
+from nanobot.agent.progress_hook import AgentProgressHook
 from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec
 from nanobot.agent.subagent import SubagentManager
 from nanobot.agent.tools.file_state import FileStateStore, bind_file_states, reset_file_states
@@ -35,15 +35,9 @@ from nanobot.providers.factory import ProviderSnapshot
 from nanobot.session.manager import Session, SessionManager
 from nanobot.utils.artifacts import generated_image_paths_from_messages
 from nanobot.utils.document import extract_documents
-from nanobot.utils.helpers import IncrementalThinkExtractor, image_placeholder_text
+from nanobot.utils.helpers import image_placeholder_text
 from nanobot.utils.helpers import truncate_text as truncate_text_fn
 from nanobot.utils.image_generation_intent import image_generation_prompt
-from nanobot.utils.progress_events import (
-    build_tool_event_finish_payloads,
-    build_tool_event_start_payload,
-    invoke_on_progress,
-    on_progress_accepts_tool_events,
-)
 from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
 from nanobot.utils.webui_titles import mark_webui_session, maybe_generate_webui_title_after_turn
 
@@ -59,148 +53,6 @@ if TYPE_CHECKING:
 UNIFIED_SESSION_KEY = "unified:default"
 
 
-class _LoopHook(AgentHook):
-    """Core hook for the main loop."""
-
-    def __init__(
-        self,
-        agent_loop: AgentLoop,
-        on_progress: Callable[..., Awaitable[None]] | None = None,
-        on_stream: Callable[[str], Awaitable[None]] | None = None,
-        on_stream_end: Callable[..., Awaitable[None]] | None = None,
-        *,
-        channel: str = "cli",
-        chat_id: str = "direct",
-        message_id: str | None = None,
-        metadata: dict[str, Any] | None = None,
-        session_key: str | None = None,
-    ) -> None:
-        super().__init__(reraise=True)
-        self._loop = agent_loop
-        self._on_progress = on_progress
-        self._on_stream = on_stream
-        self._on_stream_end = on_stream_end
-        self._channel = channel
-        self._chat_id = chat_id
-        self._message_id = message_id
-        self._metadata = metadata or {}
-        self._session_key = session_key
-        self._stream_buf = ""
-        self._think_extractor = IncrementalThinkExtractor()
-        self._reasoning_open = False
-
-    def wants_streaming(self) -> bool:
-        return self._on_stream is not None
-
-    async def on_stream(self, context: AgentHookContext, delta: str) -> None:
-        from nanobot.utils.helpers import strip_think
-
-        prev_clean = strip_think(self._stream_buf)
-        self._stream_buf += delta
-        new_clean = strip_think(self._stream_buf)
-        incremental = new_clean[len(prev_clean) :]
-
-        if await self._think_extractor.feed(self._stream_buf, self.emit_reasoning):
-            context.streamed_reasoning = True
-
-        if incremental:
-            # Answer text has started — close any open reasoning segment so
-            # the UI can lock the bubble before the answer renders below it.
-            await self.emit_reasoning_end()
-            if self._on_stream:
-                await self._on_stream(incremental)
-
-    async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
-        await self.emit_reasoning_end()
-        if self._on_stream_end:
-            await self._on_stream_end(resuming=resuming)
-        self._stream_buf = ""
-        self._think_extractor.reset()
-
-    async def before_iteration(self, context: AgentHookContext) -> None:
-        self._loop._current_iteration = context.iteration
-        logger.debug(
-            "Starting agent loop iteration {} for session {}",
-            context.iteration,
-            self._session_key,
-        )
-
-    async def before_execute_tools(self, context: AgentHookContext) -> None:
-        if self._on_progress:
-            if not self._on_stream and not context.streamed_content:
-                thought = self._loop._strip_think(
-                    context.response.content if context.response else None
-                )
-                if thought:
-                    await self._on_progress(thought)
-            tool_hint = self._loop._strip_think(self._loop._tool_hint(context.tool_calls))
-            tool_events = [build_tool_event_start_payload(tc) for tc in context.tool_calls]
-            await invoke_on_progress(
-                self._on_progress,
-                tool_hint,
-                tool_hint=True,
-                tool_events=tool_events,
-            )
-        for tc in context.tool_calls:
-            args_str = json.dumps(tc.arguments, ensure_ascii=False)
-            logger.info("Tool call: {}({})", tc.name, args_str[:200])
-        self._loop._set_tool_context(
-            self._channel,
-            self._chat_id,
-            self._message_id,
-            self._metadata,
-            session_key=self._session_key,
-        )
-
-    async def emit_reasoning(self, reasoning_content: str | None) -> None:
-        """Publish a reasoning chunk; channel plugins decide whether to render.
-
-        Each call is one delta in a streaming session. ``emit_reasoning_end``
-        closes the segment. The loop is intentionally not the gate:
-        ``ChannelsConfig.show_reasoning`` is a default that ``ChannelManager``
-        and ``BaseChannel.send_reasoning_delta`` consult per channel — a
-        channel without a low-emphasis UI primitive keeps the base no-op
-        and the content drops at the dispatch boundary.
-        """
-        if self._on_progress and reasoning_content:
-            self._reasoning_open = True
-            await self._on_progress(reasoning_content, reasoning=True)
-
-    async def emit_reasoning_end(self) -> None:
-        """Close the current reasoning stream segment, if any was open."""
-        if self._reasoning_open and self._on_progress:
-            self._reasoning_open = False
-            await self._on_progress("", reasoning_end=True)
-        else:
-            self._reasoning_open = False
-
-    async def after_iteration(self, context: AgentHookContext) -> None:
-        if (
-            self._on_progress
-            and context.tool_calls
-            and context.tool_events
-            and on_progress_accepts_tool_events(self._on_progress)
-        ):
-            tool_events = build_tool_event_finish_payloads(context)
-            if tool_events:
-                await invoke_on_progress(
-                    self._on_progress,
-                    "",
-                    tool_hint=False,
-                    tool_events=tool_events,
-                )
-        u = context.usage or {}
-        logger.debug(
-            "LLM usage: prompt={} completion={} cached={}",
-            u.get("prompt_tokens", 0),
-            u.get("completion_tokens", 0),
-            u.get("cached_tokens", 0),
-        )
-
-    def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
-        return self._loop._strip_think(content)
-
-
 class TurnState(Enum):
     RESTORE = auto()
     COMPACT = auto()
@@ -651,26 +503,11 @@ class AgentLoop:
             if tool and isinstance(tool, ContextAware):
                 tool.set_context(request_ctx)
 
-    @staticmethod
-    def _strip_think(text: str | None) -> str | None:
-        """Remove <think>…</think> blocks that some models embed in content."""
-        if not text:
-            return None
-        from nanobot.utils.helpers import strip_think
-
-        return strip_think(text) or None
-
     @staticmethod
     def _runtime_chat_id(msg: InboundMessage) -> str:
         """Return the chat id shown in runtime metadata for the model."""
         return str(msg.metadata.get("context_chat_id") or msg.chat_id)
 
-    def _tool_hint(self, tool_calls: list) -> str:
-        """Format tool calls as concise hints with smart abbreviation."""
-        from nanobot.utils.tool_hints import format_tool_hints
-
-        return format_tool_hints(tool_calls, max_length=self.tool_hint_max_length)
-
     async def _build_bus_progress_callback(
         self, msg: InboundMessage
     ) -> Callable[..., Awaitable[None]]:
@@ -834,8 +671,7 @@ class AgentLoop:
         """
         self._sync_subagent_runtime_limits()
 
-        loop_hook = _LoopHook(
-            self,
+        loop_hook = AgentProgressHook(
             on_progress=on_progress,
             on_stream=on_stream,
             on_stream_end=on_stream_end,
@@ -844,6 +680,9 @@ class AgentLoop:
             message_id=message_id,
             metadata=metadata,
             session_key=session_key,
+            tool_hint_max_length=self.tool_hint_max_length,
+            set_tool_context=self._set_tool_context,
+            on_iteration=lambda iteration: setattr(self, "_current_iteration", iteration),
         )
         hook: AgentHook = (
             CompositeHook([loop_hook] + self._extra_hooks) if self._extra_hooks else loop_hook
diff --git a/nanobot/agent/progress_hook.py b/nanobot/agent/progress_hook.py
new file mode 100644
index 000000000..a9bf6a1e9
--- /dev/null
+++ b/nanobot/agent/progress_hook.py
@@ -0,0 +1,178 @@
+"""Agent hook that adapts runner events into channel progress UI."""
+
+from __future__ import annotations
+
+import inspect
+import json
+from typing import Any, Awaitable, Callable
+
+from loguru import logger
+
+from nanobot.agent.hook import AgentHook, AgentHookContext
+from nanobot.utils.helpers import IncrementalThinkExtractor, strip_think
+from nanobot.utils.progress_events import (
+    build_tool_event_finish_payloads,
+    build_tool_event_start_payload,
+    invoke_on_progress,
+    on_progress_accepts_tool_events,
+)
+from nanobot.utils.tool_hints import format_tool_hints
+
+
+class AgentProgressHook(AgentHook):
+    """Translate runner lifecycle events into user-visible progress signals."""
+
+    def __init__(
+        self,
+        on_progress: Callable[..., Awaitable[None]] | None = None,
+        on_stream: Callable[[str], Awaitable[None]] | None = None,
+        on_stream_end: Callable[..., Awaitable[None]] | None = None,
+        *,
+        channel: str = "cli",
+        chat_id: str = "direct",
+        message_id: str | None = None,
+        metadata: dict[str, Any] | None = None,
+        session_key: str | None = None,
+        tool_hint_max_length: int = 40,
+        set_tool_context: Callable[..., None] | None = None,
+        on_iteration: Callable[[int], None] | None = None,
+    ) -> None:
+        super().__init__(reraise=True)
+        self._on_progress = on_progress
+        self._on_stream = on_stream
+        self._on_stream_end = on_stream_end
+        self._channel = channel
+        self._chat_id = chat_id
+        self._message_id = message_id
+        self._metadata = metadata or {}
+        self._session_key = session_key
+        self._tool_hint_max_length = tool_hint_max_length
+        self._set_tool_context = set_tool_context
+        self._on_iteration = on_iteration
+        self._stream_buf = ""
+        self._think_extractor = IncrementalThinkExtractor()
+        self._reasoning_open = False
+
+    def wants_streaming(self) -> bool:
+        return self._on_stream is not None
+
+    @staticmethod
+    def _strip_think(text: str | None) -> str | None:
+        if not text:
+            return None
+        return strip_think(text) or None
+
+    def _tool_hint(self, tool_calls: list[Any]) -> str:
+        return format_tool_hints(tool_calls, max_length=self._tool_hint_max_length)
+
+    @staticmethod
+    def _on_progress_accepts(cb: Callable[..., Any], name: str) -> bool:
+        try:
+            sig = inspect.signature(cb)
+        except (TypeError, ValueError):
+            return False
+        if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()):
+            return True
+        return name in sig.parameters
+
+    async def on_stream(self, context: AgentHookContext, delta: str) -> None:
+        prev_clean = strip_think(self._stream_buf)
+        self._stream_buf += delta
+        new_clean = strip_think(self._stream_buf)
+        incremental = new_clean[len(prev_clean) :]
+
+        if await self._think_extractor.feed(self._stream_buf, self.emit_reasoning):
+            context.streamed_reasoning = True
+
+        if incremental:
+            # Answer text has started; close the reasoning segment so the UI can
+            # lock the bubble before the answer renders below it.
+            await self.emit_reasoning_end()
+            if self._on_stream:
+                await self._on_stream(incremental)
+
+    async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
+        await self.emit_reasoning_end()
+        if self._on_stream_end:
+            await self._on_stream_end(resuming=resuming)
+        self._stream_buf = ""
+        self._think_extractor.reset()
+
+    async def before_iteration(self, context: AgentHookContext) -> None:
+        if self._on_iteration:
+            self._on_iteration(context.iteration)
+        logger.debug(
+            "Starting agent loop iteration {} for session {}",
+            context.iteration,
+            self._session_key,
+        )
+
+    async def before_execute_tools(self, context: AgentHookContext) -> None:
+        if self._on_progress:
+            if not self._on_stream and not context.streamed_content:
+                thought = self._strip_think(context.response.content if context.response else None)
+                if thought:
+                    await self._on_progress(thought)
+            tool_hint = self._strip_think(self._tool_hint(context.tool_calls))
+            tool_events = [build_tool_event_start_payload(tc) for tc in context.tool_calls]
+            await invoke_on_progress(
+                self._on_progress,
+                tool_hint,
+                tool_hint=True,
+                tool_events=tool_events,
+            )
+        for tc in context.tool_calls:
+            args_str = json.dumps(tc.arguments, ensure_ascii=False)
+            logger.info("Tool call: {}({})", tc.name, args_str[:200])
+        if self._set_tool_context:
+            self._set_tool_context(
+                self._channel,
+                self._chat_id,
+                self._message_id,
+                self._metadata,
+                session_key=self._session_key,
+            )
+
+    async def emit_reasoning(self, reasoning_content: str | None) -> None:
+        """Publish a reasoning chunk; channel plugins decide whether to render."""
+        if (
+            self._on_progress
+            and reasoning_content
+            and self._on_progress_accepts(self._on_progress, "reasoning")
+        ):
+            self._reasoning_open = True
+            await self._on_progress(reasoning_content, reasoning=True)
+
+    async def emit_reasoning_end(self) -> None:
+        """Close the current reasoning stream segment, if any was open."""
+        if self._reasoning_open and self._on_progress:
+            self._reasoning_open = False
+            await self._on_progress("", reasoning_end=True)
+        else:
+            self._reasoning_open = False
+
+    async def after_iteration(self, context: AgentHookContext) -> None:
+        if (
+            self._on_progress
+            and context.tool_calls
+            and context.tool_events
+            and on_progress_accepts_tool_events(self._on_progress)
+        ):
+            tool_events = build_tool_event_finish_payloads(context)
+            if tool_events:
+                await invoke_on_progress(
+                    self._on_progress,
+                    "",
+                    tool_hint=False,
+                    tool_events=tool_events,
+                )
+        u = context.usage or {}
+        logger.debug(
+            "LLM usage: prompt={} completion={} cached={}",
+            u.get("prompt_tokens", 0),
+            u.get("completion_tokens", 0),
+            u.get("cached_tokens", 0),
+        )
+
+    def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
+        return self._strip_think(content)
diff --git a/webui/src/components/thread/ThreadComposer.tsx b/webui/src/components/thread/ThreadComposer.tsx
index 572ac3966..b95a7bbc4 100644
--- a/webui/src/components/thread/ThreadComposer.tsx
+++ b/webui/src/components/thread/ThreadComposer.tsx
@@ -1,6 +1,7 @@
 import {
   useCallback,
   useEffect,
+  useLayoutEffect,
   useMemo,
   useRef,
   useState,
@@ -77,6 +78,17 @@ const COMMAND_ICONS: Record<string, LucideIcon> = {
 type ImageAspectRatio = "auto" | "1:1" | "3:4" | "9:16" | "4:3" | "16:9";
 
 const IMAGE_ASPECT_RATIOS: ImageAspectRatio[] = ["auto", "1:1", "3:4", "9:16", "4:3", "16:9"];
+const SLASH_PALETTE_GAP_PX = 8;
+const SLASH_PALETTE_MAX_HEIGHT_PX = 288;
+const SLASH_PALETTE_MIN_HEIGHT_PX = 144;
+const SLASH_PALETTE_CHROME_PX = 64;
+
+type SlashPalettePlacement = "above" | "below";
+
+interface SlashPaletteLayout {
+  placement: SlashPalettePlacement;
+  maxHeight: number;
+}
 
 function slashCommandI18nKey(command: string): string {
   return command.replace(/^\//, "").replace(/-/g, "_");
@@ -96,6 +108,24 @@ function scrollNearestOverflowParent(target: EventTarget | null, deltaY: number)
   }
 }
 
+function getVisibleBounds(el: HTMLElement): { top: number; bottom: number } {
+  let top = 0;
+  let bottom = window.innerHeight;
+  let parent = el.parentElement;
+
+  while (parent) {
+    const style = window.getComputedStyle(parent);
+    if (/(auto|scroll|hidden|clip)/.test(style.overflowY)) {
+      const rect = parent.getBoundingClientRect();
+      top = Math.max(top, rect.top);
+      bottom = Math.min(bottom, rect.bottom);
+    }
+    parent = parent.parentElement;
+  }
+
+  return { top, bottom };
+}
+
 export function ThreadComposer({
   onSend,
   disabled,
@@ -117,6 +147,7 @@ export function ThreadComposer({
   const [imageAspectRatio, setImageAspectRatio] = useState<ImageAspectRatio>("auto");
   const [aspectMenuOpen, setAspectMenuOpen] = useState(false);
   const textareaRef = useRef<HTMLTextAreaElement>(null);
+  const formRef = useRef<HTMLFormElement>(null);
   const fileInputRef = useRef<HTMLInputElement>(null);
   const aspectControlRef = useRef<HTMLDivElement>(null);
   const chipRefs = useRef(new Map<string, HTMLButtonElement>());
@@ -221,6 +252,10 @@ export function ThreadComposer({
   }, [slashCommands, slashQuery, t]);
 
   const showSlashMenu = filteredSlashCommands.length > 0;
+  const [slashPaletteLayout, setSlashPaletteLayout] = useState<SlashPaletteLayout>({
+    placement: "above",
+    maxHeight: SLASH_PALETTE_MAX_HEIGHT_PX,
+  });
 
   useEffect(() => {
     setSelectedCommandIndex(0);
@@ -232,6 +267,56 @@ export function ThreadComposer({
     }
   }, [filteredSlashCommands.length, selectedCommandIndex]);
 
+  useEffect(() => {
+    if (!showSlashMenu) return;
+
+    const dismissOnPointerDown = (event: PointerEvent) => {
+      const target = event.target;
+      if (target instanceof Node && formRef.current?.contains(target)) return;
+      setSlashMenuDismissed(true);
+    };
+
+    document.addEventListener("pointerdown", dismissOnPointerDown, true);
+    return () => {
+      document.removeEventListener("pointerdown", dismissOnPointerDown, true);
+    };
+  }, [showSlashMenu]);
+
+  useLayoutEffect(() => {
+    if (!showSlashMenu) return;
+
+    const updateLayout = () => {
+      const form = formRef.current;
+      if (!form) return;
+      const rect = form.getBoundingClientRect();
+      if (rect.width === 0 && rect.height === 0) return;
+
+      const bounds = getVisibleBounds(form);
+      const spaceAbove = Math.max(0, rect.top - bounds.top - SLASH_PALETTE_GAP_PX);
+      const spaceBelow = Math.max(0, bounds.bottom - rect.bottom - SLASH_PALETTE_GAP_PX);
+      const placement: SlashPalettePlacement =
+        spaceAbove >= SLASH_PALETTE_MIN_HEIGHT_PX || spaceAbove >= spaceBelow
+          ? "above"
+          : "below";
+      const available = placement === "above" ? spaceAbove : spaceBelow;
+      const maxHeight = Math.min(SLASH_PALETTE_MAX_HEIGHT_PX, available);
+
+      setSlashPaletteLayout((current) =>
+        current.placement === placement && current.maxHeight === maxHeight
+          ? current
+          : { placement, maxHeight },
+      );
+    };
+
+    updateLayout();
+    window.addEventListener("resize", updateLayout);
+    document.addEventListener("scroll", updateLayout, true);
+    return () => {
+      window.removeEventListener("resize", updateLayout);
+      document.removeEventListener("scroll", updateLayout, true);
+    };
+  }, [filteredSlashCommands.length, showSlashMenu]);
+
   useEffect(() => {
     if (!aspectMenuOpen) return;
 
@@ -398,6 +483,7 @@ export function ThreadComposer({
 
   return (
     <form
+      ref={formRef}
       onSubmit={(e) => {
         e.preventDefault();
         submit();
@@ -412,6 +498,7 @@ export function ThreadComposer({
         <SlashCommandPalette
           commands={filteredSlashCommands}
           selectedIndex={selectedCommandIndex}
+          layout={slashPaletteLayout}
           isHero={isHero}
           onHover={setSelectedCommandIndex}
           onChoose={chooseSlashCommand}
@@ -634,6 +721,7 @@ export function ThreadComposer({
 interface SlashCommandPaletteProps {
   commands: SlashCommand[];
   selectedIndex: number;
+  layout: SlashPaletteLayout;
   isHero: boolean;
   onHover: (index: number) => void;
   onChoose: (command: SlashCommand) => void;
@@ -695,17 +783,24 @@ function ImageAspectMenu({
 function SlashCommandPalette({
   commands,
   selectedIndex,
+  layout,
   isHero,
   onHover,
   onChoose,
 }: SlashCommandPaletteProps) {
   const { t } = useTranslation();
+  const listMaxHeight = Math.max(
+    0,
+    layout.maxHeight - SLASH_PALETTE_CHROME_PX,
+  );
   return (
     <div
       role="listbox"
       aria-label={t("thread.composer.slash.ariaLabel")}
+      style={{ maxHeight: layout.maxHeight }}
       className={cn(
-        "absolute bottom-full left-1/2 z-30 mb-2 max-h-[22rem] w-[calc(100%-0.5rem)] -translate-x-1/2 overflow-hidden rounded-[18px] border",
+        "absolute left-1/2 z-30 w-[calc(100%-0.5rem)] -translate-x-1/2 overflow-hidden rounded-[18px] border",
+        layout.placement === "above" ? "bottom-full mb-2" : "top-full mt-2",
         "border-border/65 bg-popover p-1.5 text-popover-foreground shadow-[0_18px_55px_rgba(15,23,42,0.18)]",
         "dark:border-white/10 dark:shadow-[0_22px_55px_rgba(0,0,0,0.45)]",
         isHero ? "max-w-[58rem]" : "max-w-[49.5rem]",
@@ -714,7 +809,7 @@ function SlashCommandPalette({
       <div className="px-2 pb-1 pt-1 text-[11px] font-medium tracking-[0.08em] text-muted-foreground/70">
         {t("thread.composer.slash.label")}
       </div>
-      <div className="max-h-[18rem] overflow-y-auto pr-0.5">
+      <div className="overflow-y-auto pr-0.5" style={{ maxHeight: listMaxHeight }}>
         {commands.map((command, index) => {
           const Icon = COMMAND_ICONS[command.icon] ?? CircleHelp;
           const selected = index === selectedIndex;
diff --git a/webui/src/components/thread/ThreadShell.tsx b/webui/src/components/thread/ThreadShell.tsx
index c5c488de0..0d330c2a9 100644
--- a/webui/src/components/thread/ThreadShell.tsx
+++ b/webui/src/components/thread/ThreadShell.tsx
@@ -260,6 +260,7 @@ export function ThreadShell({
           }
           modelLabel={toModelBadgeLabel(modelName)}
           variant="hero"
+          slashCommands={slashCommands}
           imageMode={heroImageMode}
           onImageModeChange={setHeroImageMode}
         />
diff --git a/webui/src/tests/thread-composer.test.tsx b/webui/src/tests/thread-composer.test.tsx
index 7b147602a..015ff50ad 100644
--- a/webui/src/tests/thread-composer.test.tsx
+++ b/webui/src/tests/thread-composer.test.tsx
@@ -1,5 +1,5 @@
-import { fireEvent, render, screen } from "@testing-library/react";
-import { describe, expect, it, vi } from "vitest";
+import { fireEvent, render, screen, waitFor } from "@testing-library/react";
+import { afterEach, describe, expect, it, vi } from "vitest";
 
 import { ThreadComposer } from "@/components/thread/ThreadComposer";
 import type { SlashCommand } from "@/lib/types";
@@ -19,6 +19,33 @@ const COMMANDS: SlashCommand[] = [
     argHint: "[n]",
   },
 ];
+const ORIGINAL_INNER_HEIGHT = window.innerHeight;
+
+afterEach(() => {
+  vi.restoreAllMocks();
+  Object.defineProperty(window, "innerHeight", {
+    value: ORIGINAL_INNER_HEIGHT,
+    configurable: true,
+  });
+});
+
+function rect(init: Partial<DOMRect>): DOMRect {
+  const top = init.top ?? 0;
+  const left = init.left ?? 0;
+  const width = init.width ?? 0;
+  const height = init.height ?? 0;
+  return {
+    x: init.x ?? left,
+    y: init.y ?? top,
+    top,
+    left,
+    width,
+    height,
+    right: init.right ?? left + width,
+    bottom: init.bottom ?? top + height,
+    toJSON: () => ({}),
+  };
+}
 
 describe("ThreadComposer", () => {
   it("renders a readonly hero model composer when provided", () => {
@@ -74,7 +101,9 @@ describe("ThreadComposer", () => {
     const input = screen.getByLabelText("Message input");
     fireEvent.change(input, { target: { value: "/" } });
 
-    expect(screen.getByRole("listbox", { name: "Slash commands" })).toBeInTheDocument();
+    const palette = screen.getByRole("listbox", { name: "Slash commands" });
+    expect(palette).toBeInTheDocument();
+    expect(palette).toHaveStyle({ maxHeight: "288px" });
     expect(screen.getByRole("option", { name: /\/stop/i })).toHaveAttribute(
       "aria-selected",
       "true",
@@ -92,6 +121,55 @@ describe("ThreadComposer", () => {
     expect(screen.queryByRole("listbox", { name: "Slash commands" })).not.toBeInTheDocument();
   });
 
+  it("opens the slash command palette downward when there is more room below", async () => {
+    vi.spyOn(HTMLFormElement.prototype, "getBoundingClientRect").mockReturnValue(
+      rect({ top: 40, bottom: 160, width: 800, height: 120 }),
+    );
+    Object.defineProperty(window, "innerHeight", {
+      value: 330,
+      configurable: true,
+    });
+    render(
+      <ThreadComposer
+        onSend={vi.fn()}
+        placeholder="Ask anything..."
+        slashCommands={COMMANDS}
+        variant="hero"
+      />,
+    );
+    const input = screen.getByLabelText("Message input");
+
+    fireEvent.change(input, { target: { value: "/" } });
+
+    await waitFor(() => {
+      const palette = screen.getByRole("listbox", { name: "Slash commands" });
+      expect(palette.className).toContain("top-full");
+      expect(palette).toHaveStyle({ maxHeight: "162px" });
+    });
+  });
+
+  it("dismisses the slash command palette on outside click", () => {
+    render(
+      <div>
+        <button type="button">outside</button>
+        <ThreadComposer
+          onSend={vi.fn()}
+          placeholder="Type your message..."
+          slashCommands={COMMANDS}
+        />
+      </div>,
+    );
+
+    fireEvent.change(screen.getByLabelText("Message input"), {
+      target: { value: "/" },
+    });
+    expect(screen.getByRole("listbox", { name: "Slash commands" })).toBeInTheDocument();
+
+    fireEvent.pointerDown(screen.getByRole("button", { name: "outside" }));
+
+    expect(screen.queryByRole("listbox", { name: "Slash commands" })).not.toBeInTheDocument();
+  });
+
   it("sends image generation mode with automatic aspect ratio", () => {
     const onSend = vi.fn();
     render(
diff --git a/webui/src/tests/thread-shell.test.tsx b/webui/src/tests/thread-shell.test.tsx
index 8dd999d6b..f9bf7db0c 100644
--- a/webui/src/tests/thread-shell.test.tsx
+++ b/webui/src/tests/thread-shell.test.tsx
@@ -573,7 +573,7 @@ describe("ThreadShell", () => {
     await waitFor(() => expect(screen.getByText("live assistant reply")).toBeInTheDocument());
   });
 
-  it("does not open slash commands on the blank welcome page", async () => {
+  it("opens slash commands on the blank welcome page", async () => {
     const client = makeClient();
     vi.stubGlobal(
       "fetch",
@@ -583,10 +583,11 @@ describe("ThreadShell", () => {
           return httpJson({
             commands: [
               {
-                command: "/stop",
-                title: "Stop current task",
-                description: "Cancel the active agent turn.",
-                icon: "square",
+                command: "/history",
+                title: "Show conversation history",
+                description: "Print the last N persisted messages.",
+                icon: "history",
+                arg_hint: "[n]",
               },
             ],
           });
@@ -622,7 +623,8 @@ describe("ThreadShell", () => {
       target: { value: "/" },
     });
 
-    expect(screen.queryByRole("listbox", { name: "Slash commands" })).not.toBeInTheDocument();
+    expect(screen.getByRole("listbox", { name: "Slash commands" })).toBeInTheDocument();
+    expect(screen.getByRole("option", { name: /\/history/i })).toBeInTheDocument();
   });
 
   it("switches welcome quick actions when image mode is enabled", async () => {

From 3fab7362624af4bde6ace8ed208e1a2142d0915d Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 09:13:16 +0000
Subject: [PATCH 15/17] fix(cli): keep trace output under assistant header

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 nanobot/cli/commands.py                  | 25 +++++++++++--
 nanobot/cli/stream.py                    | 47 +++++++++++++++++++-----
 tests/cli/test_cli_input.py              | 26 +++++++++++++
 tests/cli/test_interactive_retry_wait.py | 19 ++++++++++
 4 files changed, 103 insertions(+), 14 deletions(-)

diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py
index dd23cb620..e02653bf9 100644
--- a/nanobot/cli/commands.py
+++ b/nanobot/cli/commands.py
@@ -176,13 +176,15 @@ def _print_agent_response(
     response: str,
     render_markdown: bool,
     metadata: dict | None = None,
+    show_header: bool = True,
 ) -> None:
     """Render assistant response with consistent terminal styling."""
     console = _make_console()
     content = response or ""
     body = _response_renderable(content, render_markdown, metadata)
-    console.print()
-    console.print(f"[cyan]{__logo__} nanobot[/cyan]")
+    if show_header:
+        console.print()
+        console.print(f"[cyan]{__logo__} nanobot[/cyan]")
     console.print(body)
     console.print()
 
@@ -235,6 +237,8 @@ def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None, render
     target = renderer.console if renderer else console
     pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext())
     with pause:
+        if renderer:
+            renderer.ensure_header()
         target.print(f"  [dim]↳ {text}[/dim]")
 
 
@@ -245,6 +249,8 @@ def _print_cli_reasoning(text: str, thinking: ThinkingSpinner | None, renderer:
     target = renderer.console if renderer else console
     pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext())
     with pause:
+        if renderer:
+            renderer.ensure_header()
         target.print(f"[dim italic]✻ {text}[/dim italic]")
 
 
@@ -254,6 +260,7 @@ async def _print_interactive_progress_line(text: str, thinking: ThinkingSpinner
         return
     if renderer:
         with renderer.pause_spinner():
+            renderer.ensure_header()
             renderer.console.print(f"  [dim]↳ {text}[/dim]")
     else:
         with thinking.pause() if thinking else nullcontext():
@@ -275,7 +282,7 @@ async def _maybe_print_interactive_progress(
         return False
 
     is_tool_hint = metadata.get("_tool_hint", False)
-    is_reasoning = metadata.get("_reasoning", False)
+    is_reasoning = metadata.get("_reasoning", False) or metadata.get("_reasoning_delta", False)
     if is_reasoning:
         if channels_config and not channels_config.show_reasoning:
             return True
@@ -1118,10 +1125,14 @@ def agent(
             )
             if not renderer.streamed:
                 await renderer.close()
+                print_kwargs: dict[str, Any] = {}
+                if renderer.header_printed:
+                    print_kwargs["show_header"] = False
                 _print_agent_response(
                     response.content if response else "",
                     render_markdown=markdown,
                     metadata=response.metadata if response else None,
+                    **print_kwargs,
                 )
             await agent_loop.close_mcp()
 
@@ -1246,8 +1257,14 @@ def agent(
                             if content and not meta.get("_streamed"):
                                 if renderer:
                                     await renderer.close()
+                                print_kwargs: dict[str, Any] = {}
+                                if renderer and renderer.header_printed:
+                                    print_kwargs["show_header"] = False
                                 _print_agent_response(
-                                    content, render_markdown=markdown, metadata=meta,
+                                    content,
+                                    render_markdown=markdown,
+                                    metadata=meta,
+                                    **print_kwargs,
                                 )
                         elif renderer and not renderer.streamed:
                             await renderer.close()
diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py
index 64cb4ed78..382ae9aac 100644
--- a/nanobot/cli/stream.py
+++ b/nanobot/cli/stream.py
@@ -10,6 +10,7 @@ that plagued earlier approaches.
 from __future__ import annotations
 
 import sys
+from contextlib import contextmanager, nullcontext
 
 from rich.console import Console
 from rich.live import Live
@@ -93,6 +94,7 @@ class StreamRenderer:
         self._console = _make_console()
         self._live: Live | None = None
         self._spinner: ThinkingSpinner | None = None
+        self._header_printed = False
         self._start_spinner()
 
     def _renderable(self):
@@ -122,12 +124,41 @@ class StreamRenderer:
         """Expose the Live's console so external print functions can use it."""
         return self._console
 
+    @property
+    def header_printed(self) -> bool:
+        """Whether this turn has already opened the assistant output block."""
+        return self._header_printed
+
+    def ensure_header(self) -> None:
+        """Print the assistant header once, before trace or answer content."""
+        if self._header_printed:
+            return
+        self._stop_spinner()
+        self._console.print()
+        header = f"{self._bot_icon} {self._bot_name}" if self._bot_icon else self._bot_name
+        self._console.print(f"[cyan]{header}[/cyan]")
+        self._header_printed = True
+
     def pause_spinner(self):
-        """Context manager: temporarily stop spinner for clean output."""
-        if self._spinner:
-            return self._spinner.pause()
-        from contextlib import nullcontext
-        return nullcontext()
+        """Context manager: temporarily stop transient output for clean trace lines."""
+        @contextmanager
+        def _pause():
+            live_was_active = self._live is not None
+            if self._live:
+                # Trace/reasoning can arrive after answer streaming has started.
+                # Stop the transient Live view first so it does not leak a raw
+                # partial markdown frame before the trace line.
+                self._live.stop()
+                self._live = None
+            with self._spinner.pause() if self._spinner else nullcontext():
+                yield
+            # If more answer deltas arrive after the trace, on_delta() will
+            # create a fresh Live using the existing buffer. If no deltas arrive,
+            # on_end() prints the final buffered answer once.
+            if live_was_active:
+                return
+
+        return _pause()
 
     async def on_delta(self, delta: str) -> None:
         self.streamed = True
@@ -135,10 +166,7 @@ class StreamRenderer:
         if self._live is None:
             if not self._buf.strip():
                 return
-            self._stop_spinner()
-            self._console.print()
-            header = f"{self._bot_icon} {self._bot_name}" if self._bot_icon else self._bot_name
-            self._console.print(f"[cyan]{header}[/cyan]")
+            self.ensure_header()
             self._live = Live(
                 self._renderable(),
                 console=self._console,
@@ -174,7 +202,6 @@ class StreamRenderer:
 
     def pause(self):
         """Context manager: pause spinner for external output. No-op once streaming has started."""
-        from contextlib import nullcontext
         if self._spinner:
             return self._spinner.pause()
         return nullcontext()
diff --git a/tests/cli/test_cli_input.py b/tests/cli/test_cli_input.py
index 69293f4b8..8b7a79cfc 100644
--- a/tests/cli/test_cli_input.py
+++ b/tests/cli/test_cli_input.py
@@ -1,4 +1,5 @@
 import asyncio
+from contextlib import nullcontext
 from unittest.mock import AsyncMock, MagicMock, call, patch
 
 import pytest
@@ -96,6 +97,31 @@ def test_print_cli_progress_line_pauses_spinner_before_printing():
     assert order == ["start", "stop", "print", "start", "stop"]
 
 
+def test_print_cli_progress_line_opens_renderer_header_before_trace():
+    """Trace lines should appear under the assistant header, not under You."""
+    order: list[str] = []
+    renderer = MagicMock()
+    renderer.console.print.side_effect = lambda *_args, **_kwargs: order.append("print")
+    renderer.ensure_header.side_effect = lambda: order.append("header")
+    renderer.pause_spinner.return_value = nullcontext()
+
+    commands._print_cli_progress_line("tool running", None, renderer)
+
+    assert order == ["header", "print"]
+
+
+def test_print_cli_progress_line_stops_live_before_trace():
+    """A trace line should not leak the current transient Live frame."""
+    mock_live = MagicMock()
+    renderer = stream_mod.StreamRenderer(show_spinner=False)
+    renderer._live = mock_live
+
+    commands._print_cli_progress_line("tool running", None, renderer)
+
+    mock_live.stop.assert_called_once()
+    assert renderer._live is None
+
+
 @pytest.mark.asyncio
 async def test_print_interactive_progress_line_pauses_spinner_before_printing():
     """Interactive progress output should also pause spinner cleanly."""
diff --git a/tests/cli/test_interactive_retry_wait.py b/tests/cli/test_interactive_retry_wait.py
index 7ddef1c48..52c27d2c9 100644
--- a/tests/cli/test_interactive_retry_wait.py
+++ b/tests/cli/test_interactive_retry_wait.py
@@ -50,6 +50,25 @@ async def test_reasoning_displayed_when_show_reasoning_enabled():
     assert calls == ["Let me think about this..."]
 
 
+@pytest.mark.asyncio
+async def test_reasoning_delta_displayed_when_show_reasoning_enabled():
+    """Streamed reasoning delta frames should use the reasoning renderer."""
+    calls: list[str] = []
+    channels_config = SimpleNamespace(
+        send_progress=True, send_tool_hints=False, show_reasoning=True,
+    )
+    msg = SimpleNamespace(
+        content="I should search first.",
+        metadata={"_progress": True, "_reasoning_delta": True},
+    )
+
+    with patch("nanobot.cli.commands._print_cli_reasoning", side_effect=lambda t, th, r=None: calls.append(t)):
+        handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
+
+    assert handled is True
+    assert calls == ["I should search first."]
+
+
 @pytest.mark.asyncio
 async def test_reasoning_hidden_when_show_reasoning_disabled():
     """Reasoning content should be suppressed when show_reasoning is False."""

From 53831e161199dbfea333e06b6b4202f5e7f67dab Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 09:15:53 +0000
Subject: [PATCH 16/17] fix(cli): clear thinking spinner before trace output

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 nanobot/cli/stream.py       | 13 +++++++++++++
 tests/cli/test_cli_input.py | 18 ++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py
index 382ae9aac..899950fb6 100644
--- a/nanobot/cli/stream.py
+++ b/nanobot/cli/stream.py
@@ -18,6 +18,16 @@ from rich.markdown import Markdown
 from rich.text import Text
 
 
+def _clear_current_line(console: Console) -> None:
+    """Erase a transient status line before printing persistent output."""
+    file = console.file
+    isatty = getattr(file, "isatty", lambda: False)
+    if not isatty():
+        return
+    file.write("\r\x1b[2K")
+    file.flush()
+
+
 def _make_console() -> Console:
     """Create a Console that emits plain text when stdout is not a TTY.
 
@@ -37,6 +47,7 @@ class ThinkingSpinner:
 
     def __init__(self, console: Console | None = None, bot_name: str = "nanobot"):
         c = console or _make_console()
+        self._console = c
         self._spinner = c.status(f"[dim]{bot_name} is thinking...[/dim]", spinner="dots")
         self._active = False
 
@@ -48,6 +59,7 @@ class ThinkingSpinner:
     def __exit__(self, *exc):
         self._active = False
         self._spinner.stop()
+        _clear_current_line(self._console)
         return False
 
     def pause(self):
@@ -58,6 +70,7 @@ class ThinkingSpinner:
         def _ctx():
             if self._spinner and self._active:
                 self._spinner.stop()
+                _clear_current_line(self._console)
             try:
                 yield
             finally:
diff --git a/tests/cli/test_cli_input.py b/tests/cli/test_cli_input.py
index 8b7a79cfc..3f5619c4f 100644
--- a/tests/cli/test_cli_input.py
+++ b/tests/cli/test_cli_input.py
@@ -1,5 +1,6 @@
 import asyncio
 from contextlib import nullcontext
+from io import StringIO
 from unittest.mock import AsyncMock, MagicMock, call, patch
 
 import pytest
@@ -97,6 +98,23 @@ def test_print_cli_progress_line_pauses_spinner_before_printing():
     assert order == ["start", "stop", "print", "start", "stop"]
 
 
+def test_thinking_spinner_clears_status_line_when_paused():
+    """Stopping the spinner should erase its transient line before output."""
+    stream = StringIO()
+    stream.isatty = lambda: True  # type: ignore[method-assign]
+    mock_console = MagicMock()
+    mock_console.file = stream
+    spinner = MagicMock()
+    mock_console.status.return_value = spinner
+
+    thinking = stream_mod.ThinkingSpinner(console=mock_console)
+    with thinking:
+        with thinking.pause():
+            pass
+
+    assert "\r\x1b[2K" in stream.getvalue()
+
+
 def test_print_cli_progress_line_opens_renderer_header_before_trace():
     """Trace lines should appear under the assistant header, not under You."""
     order: list[str] = []

From 567e95dee63aea426b9620ac894d86d094f3ef16 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 09:18:59 +0000
Subject: [PATCH 17/17] fix(cli): stop spinner before resumed answer deltas

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 nanobot/cli/stream.py       |  7 +++++--
 tests/cli/test_cli_input.py | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/nanobot/cli/stream.py b/nanobot/cli/stream.py
index 899950fb6..24a141cdd 100644
--- a/nanobot/cli/stream.py
+++ b/nanobot/cli/stream.py
@@ -143,10 +143,13 @@ class StreamRenderer:
         return self._header_printed
 
     def ensure_header(self) -> None:
-        """Print the assistant header once, before trace or answer content."""
+        """Stop transient status and print the assistant header once."""
+        # A turn can print trace rows before the final answer, then restart the
+        # spinner while tools run. The next answer delta still needs to stop
+        # that spinner even though the header was already printed.
+        self._stop_spinner()
         if self._header_printed:
             return
-        self._stop_spinner()
         self._console.print()
         header = f"{self._bot_icon} {self._bot_name}" if self._bot_icon else self._bot_name
         self._console.print(f"[cyan]{header}[/cyan]")
diff --git a/tests/cli/test_cli_input.py b/tests/cli/test_cli_input.py
index 3f5619c4f..34046e8d4 100644
--- a/tests/cli/test_cli_input.py
+++ b/tests/cli/test_cli_input.py
@@ -115,6 +115,24 @@ def test_thinking_spinner_clears_status_line_when_paused():
     assert "\r\x1b[2K" in stream.getvalue()
 
 
+def test_stream_renderer_stops_spinner_even_after_header_printed():
+    """A later answer delta must stop the spinner even when header already exists."""
+    stream = StringIO()
+    stream.isatty = lambda: True  # type: ignore[method-assign]
+    mock_console = MagicMock()
+    mock_console.file = stream
+    spinner = MagicMock()
+    mock_console.status.return_value = spinner
+
+    with patch.object(stream_mod, "_make_console", return_value=mock_console):
+        renderer = stream_mod.StreamRenderer(show_spinner=True)
+        renderer._header_printed = True
+        renderer.ensure_header()
+
+    spinner.stop.assert_called_once()
+    assert "\r\x1b[2K" in stream.getvalue()
+
+
 def test_print_cli_progress_line_opens_renderer_header_before_trace():
     """Trace lines should appear under the assistant header, not under You."""
     order: list[str] = []