mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-19 16:12:30 +00:00
Merge PR #3655: feat(reason): display model reasoning content during streaming
feat(reason): display model reasoning content during streaming
This commit is contained in:
commit
79e528119c
@ -238,6 +238,9 @@ nanobot channels login <channel_name> --force # re-authenticate
|
||||
| `supports_streaming` (property) | `True` when config has `"streaming": true` **and** subclass overrides `send_delta()`. |
|
||||
| `is_running` | Returns `self._running`. |
|
||||
| `login(force=False)` | Perform interactive login (e.g. QR code scan). Returns `True` if already authenticated or login succeeds. Override in subclasses that support interactive login. |
|
||||
| `send_reasoning_delta(chat_id, delta, metadata?)` | Optional hook for streamed model reasoning/thinking content. Default is no-op. |
|
||||
| `send_reasoning_end(chat_id, metadata?)` | Optional hook marking the end of a reasoning block. Default is no-op. |
|
||||
| `send_reasoning(msg)` | Optional one-shot reasoning fallback. Default translates to `send_reasoning_delta()` + `send_reasoning_end()`. |
|
||||
|
||||
### Optional (streaming)
|
||||
|
||||
@ -350,6 +353,112 @@ When `streaming` is `false` (default) or omitted, only `send()` is called — no
|
||||
| `async send_delta(chat_id, delta, metadata?)` | Override to handle streaming chunks. No-op by default. |
|
||||
| `supports_streaming` (property) | Returns `True` when config has `streaming: true` **and** subclass overrides `send_delta`. |
|
||||
|
||||
## Progress, Tool Hints, and Reasoning
|
||||
|
||||
Besides normal assistant text, nanobot can emit low-emphasis trace blocks. These are intended for UI affordances like status rows, collapsible "used tools" groups, or reasoning/thinking blocks. Platforms that do not have a good place for them can ignore them safely.
|
||||
|
||||
### Progress and Tool Hints
|
||||
|
||||
Progress and tool hints arrive through the normal `send(msg)` path. Check `msg.metadata` before rendering:
|
||||
|
||||
```python
|
||||
async def send(self, msg: OutboundMessage) -> None:
|
||||
meta = msg.metadata or {}
|
||||
|
||||
if meta.get("_tool_hint"):
|
||||
# A short tool breadcrumb, e.g. read_file("config.json")
|
||||
await self._send_trace(msg.chat_id, msg.content, kind="tool")
|
||||
return
|
||||
|
||||
if meta.get("_progress"):
|
||||
# Generic non-final status, e.g. "Thinking..." or "Running command..."
|
||||
await self._send_trace(msg.chat_id, msg.content, kind="progress")
|
||||
return
|
||||
|
||||
await self._send_message(msg.chat_id, msg.content, media=msg.media)
|
||||
```
|
||||
|
||||
Tool hints are off by default for most channels. Users can enable them globally or per channel:
|
||||
|
||||
```json
|
||||
{
|
||||
"channels": {
|
||||
"sendToolHints": true,
|
||||
"webhook": {
|
||||
"enabled": true,
|
||||
"sendToolHints": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Reasoning Blocks
|
||||
|
||||
Reasoning is delivered through dedicated optional hooks, not `send()`. Override `send_reasoning_delta()` and `send_reasoning_end()` if your platform can show model reasoning as a subdued/collapsible block. The default implementation is a no-op, so unsupported channels simply drop reasoning content.
|
||||
|
||||
```python
|
||||
class WebhookChannel(BaseChannel):
|
||||
name = "webhook"
|
||||
display_name = "Webhook"
|
||||
|
||||
def __init__(self, config: Any, bus: MessageBus):
|
||||
if isinstance(config, dict):
|
||||
config = WebhookConfig(**config)
|
||||
super().__init__(config, bus)
|
||||
self._reasoning_buffers: dict[str, str] = {}
|
||||
|
||||
async def send_reasoning_delta(
|
||||
self,
|
||||
chat_id: str,
|
||||
delta: str,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
meta = metadata or {}
|
||||
stream_id = str(meta.get("_stream_id") or chat_id)
|
||||
self._reasoning_buffers[stream_id] = self._reasoning_buffers.get(stream_id, "") + delta
|
||||
await self._update_reasoning_block(chat_id, self._reasoning_buffers[stream_id], final=False)
|
||||
|
||||
async def send_reasoning_end(
|
||||
self,
|
||||
chat_id: str,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
meta = metadata or {}
|
||||
stream_id = str(meta.get("_stream_id") or chat_id)
|
||||
text = self._reasoning_buffers.pop(stream_id, "")
|
||||
if text:
|
||||
await self._update_reasoning_block(chat_id, text, final=True)
|
||||
```
|
||||
|
||||
**Reasoning metadata flags:**
|
||||
|
||||
| Flag | Meaning |
|
||||
|------|---------|
|
||||
| `_reasoning_delta: True` | A reasoning/thinking chunk; `delta` contains the new text. |
|
||||
| `_reasoning_end: True` | The current reasoning block is complete; `delta` is empty. |
|
||||
| `_reasoning: True` | Legacy one-shot reasoning. `BaseChannel.send_reasoning()` converts it to delta + end. |
|
||||
| `_stream_id` | Stable id for this assistant turn/segment. Use it to key buffers instead of only `chat_id`. |
|
||||
|
||||
Reasoning visibility is controlled by `showReasoning` globally or per channel:
|
||||
|
||||
```json
|
||||
{
|
||||
"channels": {
|
||||
"showReasoning": true,
|
||||
"webhook": {
|
||||
"enabled": true,
|
||||
"showReasoning": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Recommended rendering:
|
||||
|
||||
- Render tool hints and progress as trace/status UI, not as normal assistant replies.
|
||||
- Render reasoning with lower visual emphasis and collapse it after completion when the platform supports that.
|
||||
- Keep reasoning separate from final answer text. A final answer still arrives through `send()` or `send_delta()`.
|
||||
|
||||
## Config
|
||||
|
||||
### Why Pydantic model is required
|
||||
|
||||
@ -743,6 +743,7 @@ Global settings that apply to all channels. Configure under the `channels` secti
|
||||
|---------|---------|-------------|
|
||||
| `sendProgress` | `true` | Stream agent's text progress to the channel |
|
||||
| `sendToolHints` | `false` | Stream tool-call hints (e.g. `read_file("…")`) |
|
||||
| `showReasoning` | `true` | Allow channels to surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `<think>` tags). Reasoning flows as a dedicated stream with `_reasoning_delta` / `_reasoning_end` markers — channels override `send_reasoning_delta` / `send_reasoning_end` to render in-place updates. Even with `true`, channels without those overrides stay no-op silently. Currently surfaced on CLI and WebSocket/WebUI (italic shimmer header, auto-collapses after the stream ends); Telegram / Slack / Discord / Feishu / WeChat / Matrix keep the base no-op until their bubble UI is adapted. Independent of `sendProgress`. |
|
||||
| `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) |
|
||||
| `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key is auto-resolved from the matching provider config. |
|
||||
| `transcriptionLanguage` | `null` | Optional ISO-639-1 language hint for audio transcription, e.g. `"en"`, `"ko"`, `"ja"`. |
|
||||
|
||||
@ -128,6 +128,29 @@ All frames are JSON text. Each message has an `event` field.
|
||||
}
|
||||
```
|
||||
|
||||
**`reasoning_delta`** — incremental model reasoning / thinking chunk for the active assistant turn. Mirrors `delta` but targets the reasoning bubble above the answer rather than the answer body:
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "reasoning_delta",
|
||||
"chat_id": "uuid-v4",
|
||||
"text": "Let me decompose ",
|
||||
"stream_id": "r1"
|
||||
}
|
||||
```
|
||||
|
||||
**`reasoning_end`** — close marker for the active reasoning stream. WebUI uses this to lock the in-place bubble and switch from the shimmer header to a static collapsed state:
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "reasoning_end",
|
||||
"chat_id": "uuid-v4",
|
||||
"stream_id": "r1"
|
||||
}
|
||||
```
|
||||
|
||||
Reasoning frames only flow when the channel's `showReasoning` is `true` (default) and the model returns reasoning content (DeepSeek-R1 / Kimi / MiMo / OpenAI reasoning models, Anthropic extended thinking, or inline `<think>` / `<thought>` tags). Models without reasoning produce zero `reasoning_delta` frames.
|
||||
|
||||
**`runtime_model_updated`** — broadcast when the gateway runtime model changes, for example after `/model <preset>`:
|
||||
|
||||
```json
|
||||
|
||||
@ -22,6 +22,7 @@ class AgentHookContext:
|
||||
tool_results: list[Any] = field(default_factory=list)
|
||||
tool_events: list[dict[str, str]] = field(default_factory=list)
|
||||
streamed_content: bool = False
|
||||
streamed_reasoning: bool = False
|
||||
final_content: str | None = None
|
||||
stop_reason: str | None = None
|
||||
error: str | None = None
|
||||
@ -48,6 +49,17 @@ class AgentHook:
|
||||
async def before_execute_tools(self, context: AgentHookContext) -> None:
|
||||
pass
|
||||
|
||||
async def emit_reasoning(self, reasoning_content: str | None) -> None:
|
||||
pass
|
||||
|
||||
async def emit_reasoning_end(self) -> None:
|
||||
"""Mark the end of an in-flight reasoning stream.
|
||||
|
||||
Hooks that buffer ``emit_reasoning`` chunks (for in-place UI updates)
|
||||
flush and freeze the rendered group here. One-shot hooks ignore.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def after_iteration(self, context: AgentHookContext) -> None:
|
||||
pass
|
||||
|
||||
@ -95,6 +107,12 @@ class CompositeHook(AgentHook):
|
||||
async def before_execute_tools(self, context: AgentHookContext) -> None:
|
||||
await self._for_each_hook_safe("before_execute_tools", context)
|
||||
|
||||
async def emit_reasoning(self, reasoning_content: str | None) -> None:
|
||||
await self._for_each_hook_safe("emit_reasoning", reasoning_content)
|
||||
|
||||
async def emit_reasoning_end(self) -> None:
|
||||
await self._for_each_hook_safe("emit_reasoning_end")
|
||||
|
||||
async def after_iteration(self, context: AgentHookContext) -> None:
|
||||
await self._for_each_hook_safe("after_iteration", context)
|
||||
|
||||
|
||||
@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from contextlib import AsyncExitStack, nullcontext, suppress
|
||||
@ -15,11 +14,12 @@ from typing import TYPE_CHECKING, Any, Awaitable, Callable
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from nanobot.agent import model_presets as preset_helpers
|
||||
from nanobot.agent.autocompact import AutoCompact
|
||||
from nanobot.agent.context import ContextBuilder
|
||||
from nanobot.agent.hook import AgentHook, AgentHookContext, CompositeHook
|
||||
from nanobot.agent.hook import AgentHook, CompositeHook
|
||||
from nanobot.agent.memory import Consolidator, Dream
|
||||
from nanobot.agent import model_presets as preset_helpers
|
||||
from nanobot.agent.progress_hook import AgentProgressHook
|
||||
from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec
|
||||
from nanobot.agent.subagent import SubagentManager
|
||||
from nanobot.agent.tools.file_state import FileStateStore, bind_file_states, reset_file_states
|
||||
@ -38,12 +38,6 @@ from nanobot.utils.document import extract_documents
|
||||
from nanobot.utils.helpers import image_placeholder_text
|
||||
from nanobot.utils.helpers import truncate_text as truncate_text_fn
|
||||
from nanobot.utils.image_generation_intent import image_generation_prompt
|
||||
from nanobot.utils.progress_events import (
|
||||
build_tool_event_finish_payloads,
|
||||
build_tool_event_start_payload,
|
||||
invoke_on_progress,
|
||||
on_progress_accepts_tool_events,
|
||||
)
|
||||
from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
|
||||
from nanobot.utils.webui_titles import mark_webui_session, maybe_generate_webui_title_after_turn
|
||||
|
||||
@ -59,114 +53,6 @@ if TYPE_CHECKING:
|
||||
UNIFIED_SESSION_KEY = "unified:default"
|
||||
|
||||
|
||||
class _LoopHook(AgentHook):
|
||||
"""Core hook for the main loop."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
agent_loop: AgentLoop,
|
||||
on_progress: Callable[..., Awaitable[None]] | None = None,
|
||||
on_stream: Callable[[str], Awaitable[None]] | None = None,
|
||||
on_stream_end: Callable[..., Awaitable[None]] | None = None,
|
||||
*,
|
||||
channel: str = "cli",
|
||||
chat_id: str = "direct",
|
||||
message_id: str | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
session_key: str | None = None,
|
||||
) -> None:
|
||||
super().__init__(reraise=True)
|
||||
self._loop = agent_loop
|
||||
self._on_progress = on_progress
|
||||
self._on_stream = on_stream
|
||||
self._on_stream_end = on_stream_end
|
||||
self._channel = channel
|
||||
self._chat_id = chat_id
|
||||
self._message_id = message_id
|
||||
self._metadata = metadata or {}
|
||||
self._session_key = session_key
|
||||
self._stream_buf = ""
|
||||
|
||||
def wants_streaming(self) -> bool:
|
||||
return self._on_stream is not None
|
||||
|
||||
async def on_stream(self, context: AgentHookContext, delta: str) -> None:
|
||||
from nanobot.utils.helpers import strip_think
|
||||
|
||||
prev_clean = strip_think(self._stream_buf)
|
||||
self._stream_buf += delta
|
||||
new_clean = strip_think(self._stream_buf)
|
||||
incremental = new_clean[len(prev_clean) :]
|
||||
if incremental and self._on_stream:
|
||||
await self._on_stream(incremental)
|
||||
|
||||
async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
|
||||
if self._on_stream_end:
|
||||
await self._on_stream_end(resuming=resuming)
|
||||
self._stream_buf = ""
|
||||
|
||||
async def before_iteration(self, context: AgentHookContext) -> None:
|
||||
self._loop._current_iteration = context.iteration
|
||||
logger.debug(
|
||||
"Starting agent loop iteration {} for session {}",
|
||||
context.iteration,
|
||||
self._session_key,
|
||||
)
|
||||
|
||||
async def before_execute_tools(self, context: AgentHookContext) -> None:
|
||||
if self._on_progress:
|
||||
if not self._on_stream and not context.streamed_content:
|
||||
thought = self._loop._strip_think(
|
||||
context.response.content if context.response else None
|
||||
)
|
||||
if thought:
|
||||
await self._on_progress(thought)
|
||||
tool_hint = self._loop._strip_think(self._loop._tool_hint(context.tool_calls))
|
||||
tool_events = [build_tool_event_start_payload(tc) for tc in context.tool_calls]
|
||||
await invoke_on_progress(
|
||||
self._on_progress,
|
||||
tool_hint,
|
||||
tool_hint=True,
|
||||
tool_events=tool_events,
|
||||
)
|
||||
for tc in context.tool_calls:
|
||||
args_str = json.dumps(tc.arguments, ensure_ascii=False)
|
||||
logger.info("Tool call: {}({})", tc.name, args_str[:200])
|
||||
self._loop._set_tool_context(
|
||||
self._channel,
|
||||
self._chat_id,
|
||||
self._message_id,
|
||||
self._metadata,
|
||||
session_key=self._session_key,
|
||||
)
|
||||
|
||||
async def after_iteration(self, context: AgentHookContext) -> None:
|
||||
if (
|
||||
self._on_progress
|
||||
and context.tool_calls
|
||||
and context.tool_events
|
||||
and on_progress_accepts_tool_events(self._on_progress)
|
||||
):
|
||||
tool_events = build_tool_event_finish_payloads(context)
|
||||
if tool_events:
|
||||
await invoke_on_progress(
|
||||
self._on_progress,
|
||||
"",
|
||||
tool_hint=False,
|
||||
tool_events=tool_events,
|
||||
)
|
||||
u = context.usage or {}
|
||||
logger.debug(
|
||||
"LLM usage: prompt={} completion={} cached={}",
|
||||
u.get("prompt_tokens", 0),
|
||||
u.get("completion_tokens", 0),
|
||||
u.get("cached_tokens", 0),
|
||||
)
|
||||
|
||||
def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
|
||||
return self._loop._strip_think(content)
|
||||
|
||||
|
||||
class TurnState(Enum):
|
||||
RESTORE = auto()
|
||||
COMPACT = auto()
|
||||
@ -617,26 +503,11 @@ class AgentLoop:
|
||||
if tool and isinstance(tool, ContextAware):
|
||||
tool.set_context(request_ctx)
|
||||
|
||||
@staticmethod
|
||||
def _strip_think(text: str | None) -> str | None:
|
||||
"""Remove <think>…</think> blocks that some models embed in content."""
|
||||
if not text:
|
||||
return None
|
||||
from nanobot.utils.helpers import strip_think
|
||||
|
||||
return strip_think(text) or None
|
||||
|
||||
@staticmethod
|
||||
def _runtime_chat_id(msg: InboundMessage) -> str:
|
||||
"""Return the chat id shown in runtime metadata for the model."""
|
||||
return str(msg.metadata.get("context_chat_id") or msg.chat_id)
|
||||
|
||||
def _tool_hint(self, tool_calls: list) -> str:
|
||||
"""Format tool calls as concise hints with smart abbreviation."""
|
||||
from nanobot.utils.tool_hints import format_tool_hints
|
||||
|
||||
return format_tool_hints(tool_calls, max_length=self.tool_hint_max_length)
|
||||
|
||||
async def _build_bus_progress_callback(
|
||||
self, msg: InboundMessage
|
||||
) -> Callable[..., Awaitable[None]]:
|
||||
@ -647,10 +518,16 @@ class AgentLoop:
|
||||
*,
|
||||
tool_hint: bool = False,
|
||||
tool_events: list[dict[str, Any]] | None = None,
|
||||
reasoning: bool = False,
|
||||
reasoning_end: bool = False,
|
||||
) -> None:
|
||||
meta = dict(msg.metadata or {})
|
||||
meta["_progress"] = True
|
||||
meta["_tool_hint"] = tool_hint
|
||||
if reasoning:
|
||||
meta["_reasoning_delta"] = True
|
||||
if reasoning_end:
|
||||
meta["_reasoning_end"] = True
|
||||
if tool_events:
|
||||
meta["_tool_events"] = tool_events
|
||||
await self.bus.publish_outbound(
|
||||
@ -794,8 +671,7 @@ class AgentLoop:
|
||||
"""
|
||||
self._sync_subagent_runtime_limits()
|
||||
|
||||
loop_hook = _LoopHook(
|
||||
self,
|
||||
loop_hook = AgentProgressHook(
|
||||
on_progress=on_progress,
|
||||
on_stream=on_stream,
|
||||
on_stream_end=on_stream_end,
|
||||
@ -804,6 +680,9 @@ class AgentLoop:
|
||||
message_id=message_id,
|
||||
metadata=metadata,
|
||||
session_key=session_key,
|
||||
tool_hint_max_length=self.tool_hint_max_length,
|
||||
set_tool_context=self._set_tool_context,
|
||||
on_iteration=lambda iteration: setattr(self, "_current_iteration", iteration),
|
||||
)
|
||||
hook: AgentHook = (
|
||||
CompositeHook([loop_hook] + self._extra_hooks) if self._extra_hooks else loop_hook
|
||||
|
||||
178
nanobot/agent/progress_hook.py
Normal file
178
nanobot/agent/progress_hook.py
Normal file
@ -0,0 +1,178 @@
|
||||
"""Agent hook that adapts runner events into channel progress UI."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
import json
|
||||
from typing import Any, Awaitable, Callable
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from nanobot.agent.hook import AgentHook, AgentHookContext
|
||||
from nanobot.utils.helpers import IncrementalThinkExtractor, strip_think
|
||||
from nanobot.utils.progress_events import (
|
||||
build_tool_event_finish_payloads,
|
||||
build_tool_event_start_payload,
|
||||
invoke_on_progress,
|
||||
on_progress_accepts_tool_events,
|
||||
)
|
||||
from nanobot.utils.tool_hints import format_tool_hints
|
||||
|
||||
|
||||
class AgentProgressHook(AgentHook):
|
||||
"""Translate runner lifecycle events into user-visible progress signals."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
on_progress: Callable[..., Awaitable[None]] | None = None,
|
||||
on_stream: Callable[[str], Awaitable[None]] | None = None,
|
||||
on_stream_end: Callable[..., Awaitable[None]] | None = None,
|
||||
*,
|
||||
channel: str = "cli",
|
||||
chat_id: str = "direct",
|
||||
message_id: str | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
session_key: str | None = None,
|
||||
tool_hint_max_length: int = 40,
|
||||
set_tool_context: Callable[..., None] | None = None,
|
||||
on_iteration: Callable[[int], None] | None = None,
|
||||
) -> None:
|
||||
super().__init__(reraise=True)
|
||||
self._on_progress = on_progress
|
||||
self._on_stream = on_stream
|
||||
self._on_stream_end = on_stream_end
|
||||
self._channel = channel
|
||||
self._chat_id = chat_id
|
||||
self._message_id = message_id
|
||||
self._metadata = metadata or {}
|
||||
self._session_key = session_key
|
||||
self._tool_hint_max_length = tool_hint_max_length
|
||||
self._set_tool_context = set_tool_context
|
||||
self._on_iteration = on_iteration
|
||||
self._stream_buf = ""
|
||||
self._think_extractor = IncrementalThinkExtractor()
|
||||
self._reasoning_open = False
|
||||
|
||||
def wants_streaming(self) -> bool:
|
||||
return self._on_stream is not None
|
||||
|
||||
@staticmethod
|
||||
def _strip_think(text: str | None) -> str | None:
|
||||
if not text:
|
||||
return None
|
||||
return strip_think(text) or None
|
||||
|
||||
def _tool_hint(self, tool_calls: list[Any]) -> str:
|
||||
return format_tool_hints(tool_calls, max_length=self._tool_hint_max_length)
|
||||
|
||||
@staticmethod
|
||||
def _on_progress_accepts(cb: Callable[..., Any], name: str) -> bool:
|
||||
try:
|
||||
sig = inspect.signature(cb)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()):
|
||||
return True
|
||||
return name in sig.parameters
|
||||
|
||||
async def on_stream(self, context: AgentHookContext, delta: str) -> None:
|
||||
prev_clean = strip_think(self._stream_buf)
|
||||
self._stream_buf += delta
|
||||
new_clean = strip_think(self._stream_buf)
|
||||
incremental = new_clean[len(prev_clean) :]
|
||||
|
||||
if await self._think_extractor.feed(self._stream_buf, self.emit_reasoning):
|
||||
context.streamed_reasoning = True
|
||||
|
||||
if incremental:
|
||||
# Answer text has started; close the reasoning segment so the UI can
|
||||
# lock the bubble before the answer renders below it.
|
||||
await self.emit_reasoning_end()
|
||||
if self._on_stream:
|
||||
await self._on_stream(incremental)
|
||||
|
||||
async def on_stream_end(self, context: AgentHookContext, *, resuming: bool) -> None:
|
||||
await self.emit_reasoning_end()
|
||||
if self._on_stream_end:
|
||||
await self._on_stream_end(resuming=resuming)
|
||||
self._stream_buf = ""
|
||||
self._think_extractor.reset()
|
||||
|
||||
async def before_iteration(self, context: AgentHookContext) -> None:
|
||||
if self._on_iteration:
|
||||
self._on_iteration(context.iteration)
|
||||
logger.debug(
|
||||
"Starting agent loop iteration {} for session {}",
|
||||
context.iteration,
|
||||
self._session_key,
|
||||
)
|
||||
|
||||
async def before_execute_tools(self, context: AgentHookContext) -> None:
|
||||
if self._on_progress:
|
||||
if not self._on_stream and not context.streamed_content:
|
||||
thought = self._strip_think(context.response.content if context.response else None)
|
||||
if thought:
|
||||
await self._on_progress(thought)
|
||||
tool_hint = self._strip_think(self._tool_hint(context.tool_calls))
|
||||
tool_events = [build_tool_event_start_payload(tc) for tc in context.tool_calls]
|
||||
await invoke_on_progress(
|
||||
self._on_progress,
|
||||
tool_hint,
|
||||
tool_hint=True,
|
||||
tool_events=tool_events,
|
||||
)
|
||||
for tc in context.tool_calls:
|
||||
args_str = json.dumps(tc.arguments, ensure_ascii=False)
|
||||
logger.info("Tool call: {}({})", tc.name, args_str[:200])
|
||||
if self._set_tool_context:
|
||||
self._set_tool_context(
|
||||
self._channel,
|
||||
self._chat_id,
|
||||
self._message_id,
|
||||
self._metadata,
|
||||
session_key=self._session_key,
|
||||
)
|
||||
|
||||
async def emit_reasoning(self, reasoning_content: str | None) -> None:
|
||||
"""Publish a reasoning chunk; channel plugins decide whether to render."""
|
||||
if (
|
||||
self._on_progress
|
||||
and reasoning_content
|
||||
and self._on_progress_accepts(self._on_progress, "reasoning")
|
||||
):
|
||||
self._reasoning_open = True
|
||||
await self._on_progress(reasoning_content, reasoning=True)
|
||||
|
||||
async def emit_reasoning_end(self) -> None:
|
||||
"""Close the current reasoning stream segment, if any was open."""
|
||||
if self._reasoning_open and self._on_progress:
|
||||
self._reasoning_open = False
|
||||
await self._on_progress("", reasoning_end=True)
|
||||
else:
|
||||
self._reasoning_open = False
|
||||
|
||||
async def after_iteration(self, context: AgentHookContext) -> None:
|
||||
if (
|
||||
self._on_progress
|
||||
and context.tool_calls
|
||||
and context.tool_events
|
||||
and on_progress_accepts_tool_events(self._on_progress)
|
||||
):
|
||||
tool_events = build_tool_event_finish_payloads(context)
|
||||
if tool_events:
|
||||
await invoke_on_progress(
|
||||
self._on_progress,
|
||||
"",
|
||||
tool_hint=False,
|
||||
tool_events=tool_events,
|
||||
)
|
||||
u = context.usage or {}
|
||||
logger.debug(
|
||||
"LLM usage: prompt={} completion={} cached={}",
|
||||
u.get("prompt_tokens", 0),
|
||||
u.get("completion_tokens", 0),
|
||||
u.get("cached_tokens", 0),
|
||||
)
|
||||
|
||||
def finalize_content(self, context: AgentHookContext, content: str | None) -> str | None:
|
||||
return self._strip_think(content)
|
||||
@ -16,9 +16,11 @@ from nanobot.agent.hook import AgentHook, AgentHookContext
|
||||
from nanobot.agent.tools.registry import ToolRegistry
|
||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||
from nanobot.utils.helpers import (
|
||||
IncrementalThinkExtractor,
|
||||
build_assistant_message,
|
||||
estimate_message_tokens,
|
||||
estimate_prompt_tokens_chain,
|
||||
extract_reasoning,
|
||||
find_legal_message_start,
|
||||
maybe_persist_tool_result,
|
||||
strip_think,
|
||||
@ -281,6 +283,17 @@ class AgentRunner:
|
||||
context.tool_calls = list(response.tool_calls)
|
||||
self._accumulate_usage(usage, raw_usage)
|
||||
|
||||
reasoning_text, cleaned_content = extract_reasoning(
|
||||
response.reasoning_content,
|
||||
response.thinking_blocks,
|
||||
response.content,
|
||||
)
|
||||
response.content = cleaned_content
|
||||
if reasoning_text and not context.streamed_reasoning:
|
||||
await hook.emit_reasoning(reasoning_text)
|
||||
await hook.emit_reasoning_end()
|
||||
context.streamed_reasoning = True
|
||||
|
||||
if response.should_execute_tools:
|
||||
context.tool_calls = list(response.tool_calls)
|
||||
if hook.wants_streaming():
|
||||
@ -605,6 +618,8 @@ class AgentRunner:
|
||||
and getattr(self.provider, "supports_progress_deltas", False) is True
|
||||
)
|
||||
|
||||
progress_state: dict[str, bool] | None = None
|
||||
|
||||
if wants_streaming:
|
||||
async def _stream(delta: str) -> None:
|
||||
if delta:
|
||||
@ -617,6 +632,8 @@ class AgentRunner:
|
||||
)
|
||||
elif wants_progress_streaming:
|
||||
stream_buf = ""
|
||||
think_extractor = IncrementalThinkExtractor()
|
||||
progress_state = {"reasoning_open": False}
|
||||
|
||||
async def _stream_progress(delta: str) -> None:
|
||||
nonlocal stream_buf
|
||||
@ -626,7 +643,15 @@ class AgentRunner:
|
||||
stream_buf += delta
|
||||
new_clean = strip_think(stream_buf)
|
||||
incremental = new_clean[len(prev_clean):]
|
||||
|
||||
if await think_extractor.feed(stream_buf, hook.emit_reasoning):
|
||||
context.streamed_reasoning = True
|
||||
progress_state["reasoning_open"] = True
|
||||
|
||||
if incremental:
|
||||
if progress_state["reasoning_open"]:
|
||||
await hook.emit_reasoning_end()
|
||||
progress_state["reasoning_open"] = False
|
||||
context.streamed_content = True
|
||||
await spec.progress_callback(incremental)
|
||||
|
||||
@ -637,16 +662,20 @@ class AgentRunner:
|
||||
else:
|
||||
coro = self.provider.chat_with_retry(**kwargs)
|
||||
|
||||
if timeout_s is None:
|
||||
return await coro
|
||||
try:
|
||||
return await asyncio.wait_for(coro, timeout=timeout_s)
|
||||
response = (
|
||||
await coro if timeout_s is None
|
||||
else await asyncio.wait_for(coro, timeout=timeout_s)
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return LLMResponse(
|
||||
content=f"Error calling LLM: timed out after {timeout_s:g}s",
|
||||
finish_reason="error",
|
||||
error_kind="timeout",
|
||||
)
|
||||
if progress_state and progress_state.get("reasoning_open"):
|
||||
await hook.emit_reasoning_end()
|
||||
return response
|
||||
|
||||
async def _request_finalization_retry(
|
||||
self,
|
||||
|
||||
@ -28,6 +28,7 @@ class BaseChannel(ABC):
|
||||
transcription_language: str | None = None
|
||||
send_progress: bool = True
|
||||
send_tool_hints: bool = False
|
||||
show_reasoning: bool = True
|
||||
|
||||
def __init__(self, config: Any, bus: MessageBus):
|
||||
"""
|
||||
@ -120,6 +121,53 @@ class BaseChannel(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
async def send_reasoning_delta(
|
||||
self, chat_id: str, delta: str, metadata: dict[str, Any] | None = None
|
||||
) -> None:
|
||||
"""Stream a chunk of model reasoning/thinking content.
|
||||
|
||||
Default is no-op. Channels with a native low-emphasis primitive
|
||||
(Slack context block, Telegram expandable blockquote, Discord
|
||||
subtext, WebUI italic bubble, ...) override to render reasoning
|
||||
as a subordinate trace that updates in place as the model thinks.
|
||||
|
||||
Streaming contract mirrors :meth:`send_delta`: ``_reasoning_delta``
|
||||
is a chunk, ``_reasoning_end`` ends the current reasoning segment,
|
||||
and stateful implementations should key buffers by ``_stream_id``
|
||||
rather than only by ``chat_id``.
|
||||
"""
|
||||
return
|
||||
|
||||
async def send_reasoning_end(
|
||||
self, chat_id: str, metadata: dict[str, Any] | None = None
|
||||
) -> None:
|
||||
"""Mark the end of a reasoning stream segment.
|
||||
|
||||
Default is no-op. Channels that buffer ``send_reasoning_delta``
|
||||
chunks for in-place updates use this signal to flush and freeze
|
||||
the rendered group; one-shot channels can ignore it entirely.
|
||||
"""
|
||||
return
|
||||
|
||||
async def send_reasoning(self, msg: OutboundMessage) -> None:
|
||||
"""Deliver a complete reasoning block.
|
||||
|
||||
Default implementation reuses the streaming pair so plugins only
|
||||
need to override the delta/end methods. Equivalent to one delta
|
||||
with the full content followed immediately by an end marker —
|
||||
keeps a single rendering path for both streamed and one-shot
|
||||
reasoning (e.g. DeepSeek-R1's final-response ``reasoning_content``).
|
||||
"""
|
||||
if not msg.content:
|
||||
return
|
||||
meta = dict(msg.metadata or {})
|
||||
meta.setdefault("_reasoning_delta", True)
|
||||
await self.send_reasoning_delta(msg.chat_id, msg.content, meta)
|
||||
end_meta = dict(meta)
|
||||
end_meta.pop("_reasoning_delta", None)
|
||||
end_meta["_reasoning_end"] = True
|
||||
await self.send_reasoning_end(msg.chat_id, end_meta)
|
||||
|
||||
@property
|
||||
def supports_streaming(self) -> bool:
|
||||
"""True when config enables streaming AND this subclass implements send_delta."""
|
||||
|
||||
@ -36,6 +36,7 @@ _SEND_RETRY_DELAYS = (1, 2, 4)
|
||||
_BOOL_CAMEL_ALIASES: dict[str, str] = {
|
||||
"send_progress": "sendProgress",
|
||||
"send_tool_hints": "sendToolHints",
|
||||
"show_reasoning": "showReasoning",
|
||||
}
|
||||
|
||||
class ChannelManager:
|
||||
@ -104,6 +105,9 @@ class ChannelManager:
|
||||
channel.send_tool_hints = self._resolve_bool_override(
|
||||
section, "send_tool_hints", self.config.channels.send_tool_hints,
|
||||
)
|
||||
channel.show_reasoning = self._resolve_bool_override(
|
||||
section, "show_reasoning", self.config.channels.show_reasoning,
|
||||
)
|
||||
self.channels[name] = channel
|
||||
logger.info("{} channel enabled", cls.display_name)
|
||||
except Exception as e:
|
||||
@ -279,6 +283,23 @@ class ChannelManager:
|
||||
timeout=1.0
|
||||
)
|
||||
|
||||
if (
|
||||
msg.metadata.get("_reasoning_delta")
|
||||
or msg.metadata.get("_reasoning_end")
|
||||
or msg.metadata.get("_reasoning")
|
||||
):
|
||||
# Reasoning rides its own plugin channel: only delivered
|
||||
# when the destination channel opts in via ``show_reasoning``
|
||||
# and overrides the streaming primitives. Channels without
|
||||
# a low-emphasis UI affordance keep the base no-op and the
|
||||
# content silently drops here. ``_reasoning`` (one-shot)
|
||||
# is accepted for backward compatibility with hooks that
|
||||
# haven't migrated to delta/end yet.
|
||||
channel = self.channels.get(msg.channel)
|
||||
if channel is not None and channel.show_reasoning:
|
||||
await self._send_with_retry(channel, msg)
|
||||
continue
|
||||
|
||||
if msg.metadata.get("_progress"):
|
||||
if msg.metadata.get("_tool_hint") and not self._should_send_progress(
|
||||
msg.channel, tool_hint=True,
|
||||
@ -329,7 +350,16 @@ class ChannelManager:
|
||||
@staticmethod
|
||||
async def _send_once(channel: BaseChannel, msg: OutboundMessage) -> None:
|
||||
"""Send one outbound message without retry policy."""
|
||||
if msg.metadata.get("_stream_delta") or msg.metadata.get("_stream_end"):
|
||||
if msg.metadata.get("_reasoning_end"):
|
||||
await channel.send_reasoning_end(msg.chat_id, msg.metadata)
|
||||
elif msg.metadata.get("_reasoning_delta"):
|
||||
await channel.send_reasoning_delta(msg.chat_id, msg.content, msg.metadata)
|
||||
elif msg.metadata.get("_reasoning"):
|
||||
# Back-compat: one-shot reasoning. BaseChannel translates this
|
||||
# to a single delta + end pair so plugins only implement the
|
||||
# streaming primitives.
|
||||
await channel.send_reasoning(msg)
|
||||
elif msg.metadata.get("_stream_delta") or msg.metadata.get("_stream_end"):
|
||||
await channel.send_delta(msg.chat_id, msg.content, msg.metadata)
|
||||
elif not msg.metadata.get("_streamed"):
|
||||
await channel.send(msg)
|
||||
|
||||
@ -1487,6 +1487,54 @@ class WebSocketChannel(BaseChannel):
|
||||
for connection in conns:
|
||||
await self._safe_send_to(connection, raw, label=" ")
|
||||
|
||||
async def send_reasoning_delta(
|
||||
self,
|
||||
chat_id: str,
|
||||
delta: str,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Push one chunk of model reasoning. Mirrors ``send_delta`` shape so
|
||||
WebUI receives a stream that opens, updates in place, and closes —
|
||||
rendered above the active assistant bubble with a shimmer header
|
||||
until the matching ``reasoning_end`` arrives.
|
||||
"""
|
||||
conns = list(self._subs.get(chat_id, ()))
|
||||
if not conns or not delta:
|
||||
return
|
||||
meta = metadata or {}
|
||||
body: dict[str, Any] = {
|
||||
"event": "reasoning_delta",
|
||||
"chat_id": chat_id,
|
||||
"text": delta,
|
||||
}
|
||||
stream_id = meta.get("_stream_id")
|
||||
if stream_id is not None:
|
||||
body["stream_id"] = stream_id
|
||||
raw = json.dumps(body, ensure_ascii=False)
|
||||
for connection in conns:
|
||||
await self._safe_send_to(connection, raw, label=" reasoning ")
|
||||
|
||||
async def send_reasoning_end(
|
||||
self,
|
||||
chat_id: str,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Close the current reasoning stream segment for in-place renderers."""
|
||||
conns = list(self._subs.get(chat_id, ()))
|
||||
if not conns:
|
||||
return
|
||||
meta = metadata or {}
|
||||
body: dict[str, Any] = {
|
||||
"event": "reasoning_end",
|
||||
"chat_id": chat_id,
|
||||
}
|
||||
stream_id = meta.get("_stream_id")
|
||||
if stream_id is not None:
|
||||
body["stream_id"] = stream_id
|
||||
raw = json.dumps(body, ensure_ascii=False)
|
||||
for connection in conns:
|
||||
await self._safe_send_to(connection, raw, label=" reasoning_end ")
|
||||
|
||||
async def send_delta(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
||||
@ -176,13 +176,15 @@ def _print_agent_response(
|
||||
response: str,
|
||||
render_markdown: bool,
|
||||
metadata: dict | None = None,
|
||||
show_header: bool = True,
|
||||
) -> None:
|
||||
"""Render assistant response with consistent terminal styling."""
|
||||
console = _make_console()
|
||||
content = response or ""
|
||||
body = _response_renderable(content, render_markdown, metadata)
|
||||
console.print()
|
||||
console.print(f"[cyan]{__logo__} nanobot[/cyan]")
|
||||
if show_header:
|
||||
console.print()
|
||||
console.print(f"[cyan]{__logo__} nanobot[/cyan]")
|
||||
console.print(body)
|
||||
console.print()
|
||||
|
||||
@ -228,42 +230,70 @@ async def _print_interactive_response(
|
||||
await run_in_terminal(_write)
|
||||
|
||||
|
||||
def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None) -> None:
|
||||
def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None:
|
||||
"""Print a CLI progress line, pausing the spinner if needed."""
|
||||
if not text.strip():
|
||||
return
|
||||
with thinking.pause() if thinking else nullcontext():
|
||||
console.print(f" [dim]↳ {text}[/dim]")
|
||||
target = renderer.console if renderer else console
|
||||
pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext())
|
||||
with pause:
|
||||
if renderer:
|
||||
renderer.ensure_header()
|
||||
target.print(f" [dim]↳ {text}[/dim]")
|
||||
|
||||
|
||||
async def _print_interactive_progress_line(text: str, renderer: StreamRenderer | None) -> None:
|
||||
"""Print an interactive progress line, pausing the renderer's spinner if needed."""
|
||||
def _print_cli_reasoning(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None:
|
||||
"""Print reasoning/thinking content in a distinct style."""
|
||||
if not text.strip():
|
||||
return
|
||||
with renderer.pause() if renderer else nullcontext():
|
||||
await _print_interactive_line(text)
|
||||
target = renderer.console if renderer else console
|
||||
pause = renderer.pause_spinner() if renderer else (thinking.pause() if thinking else nullcontext())
|
||||
with pause:
|
||||
if renderer:
|
||||
renderer.ensure_header()
|
||||
target.print(f"[dim italic]✻ {text}[/dim italic]")
|
||||
|
||||
|
||||
async def _print_interactive_progress_line(text: str, thinking: ThinkingSpinner | None, renderer: StreamRenderer | None = None) -> None:
|
||||
"""Print an interactive progress line, pausing the spinner if needed."""
|
||||
if not text.strip():
|
||||
return
|
||||
if renderer:
|
||||
with renderer.pause_spinner():
|
||||
renderer.ensure_header()
|
||||
renderer.console.print(f" [dim]↳ {text}[/dim]")
|
||||
else:
|
||||
with thinking.pause() if thinking else nullcontext():
|
||||
await _print_interactive_line(text)
|
||||
|
||||
|
||||
async def _maybe_print_interactive_progress(
|
||||
msg: Any,
|
||||
renderer: StreamRenderer | None,
|
||||
thinking: ThinkingSpinner | None,
|
||||
channels_config: Any,
|
||||
renderer: StreamRenderer | None = None,
|
||||
) -> bool:
|
||||
metadata = msg.metadata or {}
|
||||
if metadata.get("_retry_wait"):
|
||||
await _print_interactive_progress_line(msg.content, renderer)
|
||||
await _print_interactive_progress_line(msg.content, thinking, renderer)
|
||||
return True
|
||||
|
||||
if not metadata.get("_progress"):
|
||||
return False
|
||||
|
||||
is_tool_hint = metadata.get("_tool_hint", False)
|
||||
is_reasoning = metadata.get("_reasoning", False) or metadata.get("_reasoning_delta", False)
|
||||
if is_reasoning:
|
||||
if channels_config and not channels_config.show_reasoning:
|
||||
return True
|
||||
_print_cli_reasoning(msg.content, thinking, renderer)
|
||||
return True
|
||||
if channels_config and is_tool_hint and not channels_config.send_tool_hints:
|
||||
return True
|
||||
if channels_config and not is_tool_hint and not channels_config.send_progress:
|
||||
return True
|
||||
|
||||
await _print_interactive_progress_line(msg.content, renderer)
|
||||
await _print_interactive_progress_line(msg.content, thinking, renderer)
|
||||
return True
|
||||
|
||||
|
||||
@ -1064,13 +1094,20 @@ def agent(
|
||||
# Shared reference for progress callbacks
|
||||
_thinking: ThinkingSpinner | None = None
|
||||
|
||||
async def _cli_progress(content: str, *, tool_hint: bool = False, **_kwargs: Any) -> None:
|
||||
ch = agent_loop.channels_config
|
||||
if ch and tool_hint and not ch.send_tool_hints:
|
||||
return
|
||||
if ch and not tool_hint and not ch.send_progress:
|
||||
return
|
||||
_print_cli_progress_line(content, _thinking)
|
||||
def _make_progress(renderer: StreamRenderer | None = None):
|
||||
async def _cli_progress(content: str, *, tool_hint: bool = False, reasoning: bool = False, **_kwargs: Any) -> None:
|
||||
ch = agent_loop.channels_config
|
||||
if reasoning:
|
||||
if ch and not ch.show_reasoning:
|
||||
return
|
||||
_print_cli_reasoning(content, _thinking, renderer)
|
||||
return
|
||||
if ch and tool_hint and not ch.send_tool_hints:
|
||||
return
|
||||
if ch and not tool_hint and not ch.send_progress:
|
||||
return
|
||||
_print_cli_progress_line(content, _thinking, renderer)
|
||||
return _cli_progress
|
||||
|
||||
if message:
|
||||
# Single message mode — direct call, no bus needed
|
||||
@ -1082,16 +1119,20 @@ def agent(
|
||||
)
|
||||
response = await agent_loop.process_direct(
|
||||
message, session_id,
|
||||
on_progress=_cli_progress,
|
||||
on_progress=_make_progress(renderer),
|
||||
on_stream=renderer.on_delta,
|
||||
on_stream_end=renderer.on_end,
|
||||
)
|
||||
if not renderer.streamed:
|
||||
await renderer.close()
|
||||
print_kwargs: dict[str, Any] = {}
|
||||
if renderer.header_printed:
|
||||
print_kwargs["show_header"] = False
|
||||
_print_agent_response(
|
||||
response.content if response else "",
|
||||
render_markdown=markdown,
|
||||
metadata=response.metadata if response else None,
|
||||
**print_kwargs,
|
||||
)
|
||||
await agent_loop.close_mcp()
|
||||
|
||||
@ -1154,6 +1195,7 @@ def agent(
|
||||
msg,
|
||||
renderer,
|
||||
agent_loop.channels_config,
|
||||
renderer,
|
||||
):
|
||||
continue
|
||||
|
||||
@ -1215,8 +1257,14 @@ def agent(
|
||||
if content and not meta.get("_streamed"):
|
||||
if renderer:
|
||||
await renderer.close()
|
||||
print_kwargs: dict[str, Any] = {}
|
||||
if renderer and renderer.header_printed:
|
||||
print_kwargs["show_header"] = False
|
||||
_print_agent_response(
|
||||
content, render_markdown=markdown, metadata=meta,
|
||||
content,
|
||||
render_markdown=markdown,
|
||||
metadata=meta,
|
||||
**print_kwargs,
|
||||
)
|
||||
elif renderer and not renderer.streamed:
|
||||
await renderer.close()
|
||||
|
||||
@ -1,13 +1,16 @@
|
||||
"""Streaming renderer for CLI output.
|
||||
|
||||
Uses Rich Live with auto_refresh=False for stable, flicker-free
|
||||
markdown rendering during streaming. Ellipsis mode handles overflow.
|
||||
Uses Rich Live with ``transient=True`` for in-place markdown updates during
|
||||
streaming. After the live display stops, a final clean render is printed
|
||||
so the content persists on screen. ``transient=True`` ensures the live
|
||||
area is erased before ``stop()`` returns, avoiding the duplication bug
|
||||
that plagued earlier approaches.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
from contextlib import contextmanager, nullcontext
|
||||
|
||||
from rich.console import Console
|
||||
from rich.live import Live
|
||||
@ -15,6 +18,16 @@ from rich.markdown import Markdown
|
||||
from rich.text import Text
|
||||
|
||||
|
||||
def _clear_current_line(console: Console) -> None:
|
||||
"""Erase a transient status line before printing persistent output."""
|
||||
file = console.file
|
||||
isatty = getattr(file, "isatty", lambda: False)
|
||||
if not isatty():
|
||||
return
|
||||
file.write("\r\x1b[2K")
|
||||
file.flush()
|
||||
|
||||
|
||||
def _make_console() -> Console:
|
||||
"""Create a Console that emits plain text when stdout is not a TTY.
|
||||
|
||||
@ -34,6 +47,7 @@ class ThinkingSpinner:
|
||||
|
||||
def __init__(self, console: Console | None = None, bot_name: str = "nanobot"):
|
||||
c = console or _make_console()
|
||||
self._console = c
|
||||
self._spinner = c.status(f"[dim]{bot_name} is thinking...[/dim]", spinner="dots")
|
||||
self._active = False
|
||||
|
||||
@ -45,6 +59,7 @@ class ThinkingSpinner:
|
||||
def __exit__(self, *exc):
|
||||
self._active = False
|
||||
self._spinner.stop()
|
||||
_clear_current_line(self._console)
|
||||
return False
|
||||
|
||||
def pause(self):
|
||||
@ -55,6 +70,7 @@ class ThinkingSpinner:
|
||||
def _ctx():
|
||||
if self._spinner and self._active:
|
||||
self._spinner.stop()
|
||||
_clear_current_line(self._console)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
@ -65,13 +81,14 @@ class ThinkingSpinner:
|
||||
|
||||
|
||||
class StreamRenderer:
|
||||
"""Rich Live streaming with markdown. auto_refresh=False avoids render races.
|
||||
"""Streaming renderer with Rich Live for in-place updates.
|
||||
|
||||
Deltas arrive pre-filtered (no <think> tags) from the agent loop.
|
||||
During streaming: updates content in-place via Rich Live.
|
||||
On end: stops Live (transient=True erases it), then prints final render.
|
||||
|
||||
Flow per round:
|
||||
spinner -> first visible delta -> header + Live renders ->
|
||||
on_end -> Live stops (content stays on screen)
|
||||
spinner -> first delta -> header + Live updates ->
|
||||
on_end -> stop Live + final render
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@ -86,14 +103,24 @@ class StreamRenderer:
|
||||
self._bot_name = bot_name
|
||||
self._bot_icon = bot_icon
|
||||
self._buf = ""
|
||||
self._live: Live | None = None
|
||||
self._t = 0.0
|
||||
self.streamed = False
|
||||
self._console = _make_console()
|
||||
self._live: Live | None = None
|
||||
self._spinner: ThinkingSpinner | None = None
|
||||
self._header_printed = False
|
||||
self._start_spinner()
|
||||
|
||||
def _render(self):
|
||||
return Markdown(self._buf) if self._md and self._buf else Text(self._buf or "")
|
||||
def _renderable(self):
|
||||
"""Create a renderable from the current buffer."""
|
||||
if self._md and self._buf:
|
||||
return Markdown(self._buf)
|
||||
return Text(self._buf or "")
|
||||
|
||||
def _render_str(self) -> str:
|
||||
"""Render current buffer to a plain string via Rich."""
|
||||
with self._console.capture() as cap:
|
||||
self._console.print(self._renderable())
|
||||
return cap.get()
|
||||
|
||||
def _start_spinner(self) -> None:
|
||||
if self._show_spinner:
|
||||
@ -105,37 +132,85 @@ class StreamRenderer:
|
||||
self._spinner.__exit__(None, None, None)
|
||||
self._spinner = None
|
||||
|
||||
@property
|
||||
def console(self) -> Console:
|
||||
"""Expose the Live's console so external print functions can use it."""
|
||||
return self._console
|
||||
|
||||
@property
|
||||
def header_printed(self) -> bool:
|
||||
"""Whether this turn has already opened the assistant output block."""
|
||||
return self._header_printed
|
||||
|
||||
def ensure_header(self) -> None:
|
||||
"""Stop transient status and print the assistant header once."""
|
||||
# A turn can print trace rows before the final answer, then restart the
|
||||
# spinner while tools run. The next answer delta still needs to stop
|
||||
# that spinner even though the header was already printed.
|
||||
self._stop_spinner()
|
||||
if self._header_printed:
|
||||
return
|
||||
self._console.print()
|
||||
header = f"{self._bot_icon} {self._bot_name}" if self._bot_icon else self._bot_name
|
||||
self._console.print(f"[cyan]{header}[/cyan]")
|
||||
self._header_printed = True
|
||||
|
||||
def pause_spinner(self):
|
||||
"""Context manager: temporarily stop transient output for clean trace lines."""
|
||||
@contextmanager
|
||||
def _pause():
|
||||
live_was_active = self._live is not None
|
||||
if self._live:
|
||||
# Trace/reasoning can arrive after answer streaming has started.
|
||||
# Stop the transient Live view first so it does not leak a raw
|
||||
# partial markdown frame before the trace line.
|
||||
self._live.stop()
|
||||
self._live = None
|
||||
with self._spinner.pause() if self._spinner else nullcontext():
|
||||
yield
|
||||
# If more answer deltas arrive after the trace, on_delta() will
|
||||
# create a fresh Live using the existing buffer. If no deltas arrive,
|
||||
# on_end() prints the final buffered answer once.
|
||||
if live_was_active:
|
||||
return
|
||||
|
||||
return _pause()
|
||||
|
||||
async def on_delta(self, delta: str) -> None:
|
||||
self.streamed = True
|
||||
self._buf += delta
|
||||
if self._live is None:
|
||||
if not self._buf.strip():
|
||||
return
|
||||
self._stop_spinner()
|
||||
c = _make_console()
|
||||
c.print()
|
||||
header = f"{self._bot_icon} {self._bot_name}" if self._bot_icon else self._bot_name
|
||||
c.print(f"[cyan]{header}[/cyan]")
|
||||
self._live = Live(self._render(), console=c, auto_refresh=False)
|
||||
self.ensure_header()
|
||||
self._live = Live(
|
||||
self._renderable(),
|
||||
console=self._console,
|
||||
auto_refresh=False,
|
||||
transient=True,
|
||||
)
|
||||
self._live.start()
|
||||
now = time.monotonic()
|
||||
if (now - self._t) > 0.15:
|
||||
self._live.update(self._render())
|
||||
self._live.refresh()
|
||||
self._t = now
|
||||
else:
|
||||
self._live.update(self._renderable())
|
||||
self._live.refresh()
|
||||
|
||||
async def on_end(self, *, resuming: bool = False) -> None:
|
||||
if self._live:
|
||||
self._live.update(self._render())
|
||||
# Double-refresh to sync _shape before stop() calls refresh().
|
||||
self._live.refresh()
|
||||
self._live.update(self._renderable())
|
||||
self._live.refresh()
|
||||
self._live.stop()
|
||||
self._live = None
|
||||
self._stop_spinner()
|
||||
if self._buf.strip():
|
||||
# Print final rendered content (persists after Live is gone).
|
||||
out = sys.stdout
|
||||
out.write(self._render_str())
|
||||
out.flush()
|
||||
if resuming:
|
||||
self._buf = ""
|
||||
self._start_spinner()
|
||||
else:
|
||||
_make_console().print()
|
||||
|
||||
def stop_for_input(self) -> None:
|
||||
"""Stop spinner before user input to avoid prompt_toolkit conflicts."""
|
||||
@ -143,7 +218,6 @@ class StreamRenderer:
|
||||
|
||||
def pause(self):
|
||||
"""Context manager: pause spinner for external output. No-op once streaming has started."""
|
||||
from contextlib import nullcontext
|
||||
if self._spinner:
|
||||
return self._spinner.pause()
|
||||
return nullcontext()
|
||||
|
||||
@ -35,6 +35,7 @@ class ChannelsConfig(Base):
|
||||
|
||||
send_progress: bool = True # stream agent's text progress to the channel
|
||||
send_tool_hints: bool = False # stream tool-call hints (e.g. read_file("…"))
|
||||
show_reasoning: bool = True # surface model reasoning when channel implements it
|
||||
send_max_retries: int = Field(default=3, ge=0, le=10) # Max delivery attempts (initial send included)
|
||||
transcription_provider: str = "groq" # Voice transcription backend: "groq" or "openai"
|
||||
transcription_language: str | None = Field(default=None, pattern=r"^[a-z]{2,3}$") # Optional ISO-639-1 hint for audio transcription
|
||||
|
||||
@ -71,6 +71,93 @@ def strip_think(text: str) -> str:
|
||||
return text.strip()
|
||||
|
||||
|
||||
def extract_think(text: str) -> tuple[str | None, str]:
|
||||
"""Extract thinking content from inline ``<think>`` / ``<thought>`` blocks.
|
||||
|
||||
Returns ``(thinking_text, cleaned_text)``. Only closed blocks are
|
||||
extracted; unclosed streaming prefixes are stripped from the cleaned
|
||||
text but not surfaced — :func:`strip_think` handles that case.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
for m in re.finditer(r"<think>([\s\S]*?)</think>", text):
|
||||
parts.append(m.group(1).strip())
|
||||
for m in re.finditer(r"<thought>([\s\S]*?)</thought>", text):
|
||||
parts.append(m.group(1).strip())
|
||||
thinking = "\n\n".join(parts) if parts else None
|
||||
return thinking, strip_think(text)
|
||||
|
||||
|
||||
class IncrementalThinkExtractor:
|
||||
"""Stateful inline ``<think>`` extractor for streaming buffers.
|
||||
|
||||
Streaming providers expose only a single content delta channel. When a
|
||||
model embeds reasoning in ``<think>...</think>`` blocks inside that
|
||||
channel, callers need to surface the reasoning incrementally as it
|
||||
arrives without re-emitting earlier text. This holds the "already
|
||||
emitted" cursor so the runner and the loop hook share one shape.
|
||||
"""
|
||||
|
||||
__slots__ = ("_emitted",)
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._emitted = ""
|
||||
|
||||
def reset(self) -> None:
|
||||
self._emitted = ""
|
||||
|
||||
async def feed(self, buf: str, emit: Any) -> bool:
|
||||
"""Emit any new thinking text found in ``buf``.
|
||||
|
||||
Returns True if anything was emitted this call. ``emit`` is an
|
||||
async callable taking a single string (typically
|
||||
``hook.emit_reasoning``).
|
||||
"""
|
||||
thinking, _ = extract_think(buf)
|
||||
if not thinking or thinking == self._emitted:
|
||||
return False
|
||||
new = thinking[len(self._emitted):].strip()
|
||||
self._emitted = thinking
|
||||
if not new:
|
||||
return False
|
||||
await emit(new)
|
||||
return True
|
||||
|
||||
|
||||
def extract_reasoning(
|
||||
reasoning_content: str | None,
|
||||
thinking_blocks: list[dict[str, Any]] | None,
|
||||
content: str | None,
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""Return ``(reasoning_text, cleaned_content)`` from one model response.
|
||||
|
||||
Single source of truth for "what reasoning did this response carry, and
|
||||
what answer text remains after we peel it out". Fallback order:
|
||||
|
||||
1. Dedicated ``reasoning_content`` (DeepSeek-R1, Kimi, MiMo, OpenAI
|
||||
reasoning models, Bedrock).
|
||||
2. Anthropic ``thinking_blocks``.
|
||||
3. Inline ``<think>`` / ``<thought>`` blocks in ``content``.
|
||||
|
||||
Only one source contributes per response; lower-priority sources are
|
||||
ignored if a higher-priority one is present, but inline ``<think>``
|
||||
tags are still stripped from ``content`` so they never leak into the
|
||||
final answer.
|
||||
"""
|
||||
if reasoning_content:
|
||||
return reasoning_content, strip_think(content) if content else content
|
||||
if thinking_blocks:
|
||||
parts = [
|
||||
tb.get("thinking", "")
|
||||
for tb in thinking_blocks
|
||||
if isinstance(tb, dict) and tb.get("type") == "thinking"
|
||||
]
|
||||
joined = "\n\n".join(p for p in parts if p)
|
||||
return (joined or None), strip_think(content) if content else content
|
||||
if content:
|
||||
return extract_think(content)
|
||||
return None, content
|
||||
|
||||
|
||||
def detect_image_mime(data: bytes) -> str | None:
|
||||
"""Detect image MIME type from magic bytes, ignoring file extension."""
|
||||
if data[:8] == b"\x89PNG\r\n\x1a\n":
|
||||
|
||||
@ -13,6 +13,17 @@ def _ctx() -> AgentHookContext:
|
||||
return AgentHookContext(iteration=0, messages=[])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Base AgentHook emit_reasoning: no-op
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_base_hook_emit_reasoning_is_noop():
|
||||
hook = AgentHook()
|
||||
await hook.emit_reasoning("should not raise")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fan-out: every hook is called in order
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -45,6 +56,9 @@ async def test_composite_fans_out_all_async_methods():
|
||||
async def before_iteration(self, context: AgentHookContext) -> None:
|
||||
events.append("before_iteration")
|
||||
|
||||
async def emit_reasoning(self, reasoning_content: str | None) -> None:
|
||||
events.append(f"emit_reasoning:{reasoning_content}")
|
||||
|
||||
async def on_stream(self, context: AgentHookContext, delta: str) -> None:
|
||||
events.append(f"on_stream:{delta}")
|
||||
|
||||
@ -61,6 +75,7 @@ async def test_composite_fans_out_all_async_methods():
|
||||
ctx = _ctx()
|
||||
|
||||
await hook.before_iteration(ctx)
|
||||
await hook.emit_reasoning("thinking...")
|
||||
await hook.on_stream(ctx, "hi")
|
||||
await hook.on_stream_end(ctx, resuming=True)
|
||||
await hook.before_execute_tools(ctx)
|
||||
@ -68,6 +83,7 @@ async def test_composite_fans_out_all_async_methods():
|
||||
|
||||
assert events == [
|
||||
"before_iteration", "before_iteration",
|
||||
"emit_reasoning:thinking...", "emit_reasoning:thinking...",
|
||||
"on_stream:hi", "on_stream:hi",
|
||||
"on_stream_end:True", "on_stream_end:True",
|
||||
"before_execute_tools", "before_execute_tools",
|
||||
@ -120,6 +136,8 @@ async def test_composite_error_isolation_all_async():
|
||||
calls: list[str] = []
|
||||
|
||||
class Bad(AgentHook):
|
||||
async def emit_reasoning(self, reasoning_content):
|
||||
raise RuntimeError("err")
|
||||
async def on_stream_end(self, context, *, resuming):
|
||||
raise RuntimeError("err")
|
||||
async def before_execute_tools(self, context):
|
||||
@ -128,6 +146,8 @@ async def test_composite_error_isolation_all_async():
|
||||
raise RuntimeError("err")
|
||||
|
||||
class Good(AgentHook):
|
||||
async def emit_reasoning(self, reasoning_content):
|
||||
calls.append("emit_reasoning")
|
||||
async def on_stream_end(self, context, *, resuming):
|
||||
calls.append("on_stream_end")
|
||||
async def before_execute_tools(self, context):
|
||||
@ -137,10 +157,11 @@ async def test_composite_error_isolation_all_async():
|
||||
|
||||
hook = CompositeHook([Bad(), Good()])
|
||||
ctx = _ctx()
|
||||
await hook.emit_reasoning("test")
|
||||
await hook.on_stream_end(ctx, resuming=False)
|
||||
await hook.before_execute_tools(ctx)
|
||||
await hook.after_iteration(ctx)
|
||||
assert calls == ["on_stream_end", "before_execute_tools", "after_iteration"]
|
||||
assert calls == ["emit_reasoning", "on_stream_end", "before_execute_tools", "after_iteration"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
321
tests/agent/test_runner_reasoning.py
Normal file
321
tests/agent/test_runner_reasoning.py
Normal file
@ -0,0 +1,321 @@
|
||||
"""Tests for AgentRunner reasoning extraction and emission.
|
||||
|
||||
Covers the three sources of model reasoning (dedicated ``reasoning_content``,
|
||||
Anthropic ``thinking_blocks``, inline ``<think>``/``<thought>`` tags) plus
|
||||
the streaming interaction: reasoning and answer streams are independent
|
||||
channels, gated by ``context.streamed_reasoning`` rather than
|
||||
``context.streamed_content``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from nanobot.agent.hook import AgentHook
|
||||
from nanobot.config.schema import AgentDefaults
|
||||
from nanobot.providers.base import LLMResponse, ToolCallRequest
|
||||
|
||||
_MAX_TOOL_RESULT_CHARS = AgentDefaults().max_tool_result_chars
|
||||
|
||||
|
||||
class _RecordingHook(AgentHook):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.emitted: list[str] = []
|
||||
self.end_calls = 0
|
||||
|
||||
async def emit_reasoning(self, reasoning_content: str | None) -> None:
|
||||
if reasoning_content:
|
||||
self.emitted.append(reasoning_content)
|
||||
|
||||
async def emit_reasoning_end(self) -> None:
|
||||
self.end_calls += 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_preserves_reasoning_fields_in_assistant_history():
|
||||
"""Reasoning fields ride along on the persisted assistant message so
|
||||
follow-up provider calls retain the model's prior thinking context."""
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
provider = MagicMock()
|
||||
captured_second_call: list[dict] = []
|
||||
call_count = {"n": 0}
|
||||
|
||||
async def chat_with_retry(*, messages, **kwargs):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
return LLMResponse(
|
||||
content="thinking",
|
||||
tool_calls=[ToolCallRequest(id="call_1", name="list_dir", arguments={"path": "."})],
|
||||
reasoning_content="hidden reasoning",
|
||||
thinking_blocks=[{"type": "thinking", "thinking": "step"}],
|
||||
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||
)
|
||||
captured_second_call[:] = messages
|
||||
return LLMResponse(content="done", tool_calls=[], usage={})
|
||||
|
||||
provider.chat_with_retry = chat_with_retry
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
tools.execute = AsyncMock(return_value="tool result")
|
||||
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[
|
||||
{"role": "system", "content": "system"},
|
||||
{"role": "user", "content": "do task"},
|
||||
],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=3,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
))
|
||||
|
||||
assert result.final_content == "done"
|
||||
assistant_messages = [
|
||||
msg for msg in captured_second_call
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls")
|
||||
]
|
||||
assert len(assistant_messages) == 1
|
||||
assert assistant_messages[0]["reasoning_content"] == "hidden reasoning"
|
||||
assert assistant_messages[0]["thinking_blocks"] == [{"type": "thinking", "thinking": "step"}]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_emits_anthropic_thinking_blocks():
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
provider = MagicMock()
|
||||
|
||||
async def chat_with_retry(**kwargs):
|
||||
return LLMResponse(
|
||||
content="The answer is 42.",
|
||||
thinking_blocks=[
|
||||
{"type": "thinking", "thinking": "Let me analyze this step by step.", "signature": "sig1"},
|
||||
{"type": "thinking", "thinking": "After careful consideration.", "signature": "sig2"},
|
||||
],
|
||||
tool_calls=[],
|
||||
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||
)
|
||||
|
||||
provider.chat_with_retry = chat_with_retry
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
hook = _RecordingHook()
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "question"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=3,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
hook=hook,
|
||||
))
|
||||
|
||||
assert result.final_content == "The answer is 42."
|
||||
assert len(hook.emitted) == 1
|
||||
assert "Let me analyze this" in hook.emitted[0]
|
||||
assert "After careful consideration" in hook.emitted[0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_emits_inline_think_content_as_reasoning():
|
||||
"""Models embedding reasoning in <think>...</think> blocks should have
|
||||
that content extracted and emitted, and stripped from the answer."""
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
provider = MagicMock()
|
||||
|
||||
async def chat_with_retry(**kwargs):
|
||||
return LLMResponse(
|
||||
content="<think>Let me think about this...\nThe answer is 42.</think>The answer is 42.",
|
||||
tool_calls=[],
|
||||
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||
)
|
||||
|
||||
provider.chat_with_retry = chat_with_retry
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
hook = _RecordingHook()
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "what is the answer?"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=3,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
hook=hook,
|
||||
))
|
||||
|
||||
assert result.final_content == "The answer is 42."
|
||||
assert len(hook.emitted) == 1
|
||||
assert "Let me think about this" in hook.emitted[0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_prefers_reasoning_content_over_inline_think():
|
||||
"""Fallback priority: dedicated reasoning_content wins; inline <think>
|
||||
is still scrubbed from the answer content."""
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
provider = MagicMock()
|
||||
|
||||
async def chat_with_retry(**kwargs):
|
||||
return LLMResponse(
|
||||
content="<think>inline thinking</think>The answer.",
|
||||
reasoning_content="dedicated reasoning field",
|
||||
tool_calls=[],
|
||||
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||
)
|
||||
|
||||
provider.chat_with_retry = chat_with_retry
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
hook = _RecordingHook()
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "question"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=3,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
hook=hook,
|
||||
))
|
||||
|
||||
assert result.final_content == "The answer."
|
||||
assert hook.emitted == ["dedicated reasoning field"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_emits_reasoning_content_even_when_answer_was_streamed():
|
||||
"""`reasoning_content` arrives only on the final response; streaming the
|
||||
answer must not suppress it (the answer stream and the reasoning channel
|
||||
are independent — only the reasoning-already-emitted bit matters)."""
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
provider = MagicMock()
|
||||
provider.supports_progress_deltas = True
|
||||
|
||||
async def chat_stream_with_retry(*, on_content_delta=None, **kwargs):
|
||||
if on_content_delta:
|
||||
await on_content_delta("The ")
|
||||
await on_content_delta("answer.")
|
||||
return LLMResponse(
|
||||
content="The answer.",
|
||||
reasoning_content="step-by-step deduction",
|
||||
tool_calls=[],
|
||||
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||
)
|
||||
|
||||
provider.chat_stream_with_retry = chat_stream_with_retry
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
progress_calls: list[str] = []
|
||||
|
||||
async def _progress(content: str, **_kwargs):
|
||||
progress_calls.append(content)
|
||||
|
||||
hook = _RecordingHook()
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "question"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=3,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
hook=hook,
|
||||
stream_progress_deltas=True,
|
||||
progress_callback=_progress,
|
||||
))
|
||||
|
||||
assert result.final_content == "The answer."
|
||||
assert progress_calls, "answer should have streamed via progress callback"
|
||||
assert hook.emitted == ["step-by-step deduction"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_does_not_double_emit_when_inline_think_already_streamed():
|
||||
"""Inline `<think>` blocks streamed incrementally during the answer
|
||||
stream must not be re-emitted from the final response."""
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
provider = MagicMock()
|
||||
provider.supports_progress_deltas = True
|
||||
|
||||
async def chat_stream_with_retry(*, on_content_delta=None, **kwargs):
|
||||
if on_content_delta:
|
||||
await on_content_delta("<think>working...</think>")
|
||||
await on_content_delta("The answer.")
|
||||
return LLMResponse(
|
||||
content="<think>working...</think>The answer.",
|
||||
tool_calls=[],
|
||||
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||
)
|
||||
|
||||
provider.chat_stream_with_retry = chat_stream_with_retry
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
async def _progress(content: str, **_kwargs):
|
||||
pass
|
||||
|
||||
hook = _RecordingHook()
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "question"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=3,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
hook=hook,
|
||||
stream_progress_deltas=True,
|
||||
progress_callback=_progress,
|
||||
))
|
||||
|
||||
assert result.final_content == "The answer."
|
||||
assert hook.emitted == ["working..."]
|
||||
assert hook.end_calls >= 1, "reasoning stream must be closed once the answer starts"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_closes_reasoning_stream_after_one_shot_response():
|
||||
"""A non-streaming response carrying ``reasoning_content`` must emit
|
||||
both a reasoning delta and an end marker so channels can finalize the
|
||||
in-place bubble."""
|
||||
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||
|
||||
provider = MagicMock()
|
||||
|
||||
async def chat_with_retry(**kwargs):
|
||||
return LLMResponse(
|
||||
content="answer",
|
||||
reasoning_content="hidden thought",
|
||||
tool_calls=[],
|
||||
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||
)
|
||||
|
||||
provider.chat_with_retry = chat_with_retry
|
||||
tools = MagicMock()
|
||||
tools.get_definitions.return_value = []
|
||||
|
||||
hook = _RecordingHook()
|
||||
runner = AgentRunner(provider)
|
||||
result = await runner.run(AgentRunSpec(
|
||||
initial_messages=[{"role": "user", "content": "q"}],
|
||||
tools=tools,
|
||||
model="test-model",
|
||||
max_iterations=3,
|
||||
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||
hook=hook,
|
||||
))
|
||||
|
||||
assert result.final_content == "answer"
|
||||
assert hook.emitted == ["hidden thought"]
|
||||
assert hook.end_calls == 1
|
||||
228
tests/channels/test_channel_manager_reasoning.py
Normal file
228
tests/channels/test_channel_manager_reasoning.py
Normal file
@ -0,0 +1,228 @@
|
||||
"""Tests for ChannelManager routing of model reasoning content.
|
||||
|
||||
Reasoning is delivered through plugin streaming primitives
|
||||
(``send_reasoning_delta`` / ``send_reasoning_end``) so each channel
|
||||
controls in-place rendering — mirroring the existing answer ``send_delta``
|
||||
/ ``stream_end`` pair. The manager forwards reasoning frames only to
|
||||
channels that opt in via ``channel.show_reasoning``; plugins without a
|
||||
low-emphasis UI primitive keep the base no-op and the content silently
|
||||
drops at dispatch.
|
||||
|
||||
One-shot ``_reasoning`` frames are accepted for back-compat with hooks
|
||||
that haven't migrated yet — ``BaseChannel.send_reasoning`` expands them
|
||||
to a single delta + end pair so plugins only implement the streaming
|
||||
primitives.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from nanobot.bus.events import OutboundMessage
|
||||
from nanobot.bus.queue import MessageBus
|
||||
from nanobot.channels.base import BaseChannel
|
||||
from nanobot.channels.manager import ChannelManager
|
||||
from nanobot.config.schema import Config
|
||||
|
||||
|
||||
class _MockChannel(BaseChannel):
|
||||
name = "mock"
|
||||
display_name = "Mock"
|
||||
|
||||
def __init__(self, config, bus):
|
||||
super().__init__(config, bus)
|
||||
self._send_mock = AsyncMock()
|
||||
self._delta_mock = AsyncMock()
|
||||
self._end_mock = AsyncMock()
|
||||
|
||||
async def start(self): # pragma: no cover - not exercised
|
||||
pass
|
||||
|
||||
async def stop(self): # pragma: no cover - not exercised
|
||||
pass
|
||||
|
||||
async def send(self, msg):
|
||||
return await self._send_mock(msg)
|
||||
|
||||
async def send_reasoning_delta(self, chat_id, delta, metadata=None):
|
||||
return await self._delta_mock(chat_id, delta, metadata)
|
||||
|
||||
async def send_reasoning_end(self, chat_id, metadata=None):
|
||||
return await self._end_mock(chat_id, metadata)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def manager() -> ChannelManager:
|
||||
mgr = ChannelManager(Config(), MessageBus())
|
||||
mgr.channels["mock"] = _MockChannel({}, mgr.bus)
|
||||
return mgr
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_delta_routes_to_send_reasoning_delta(manager):
|
||||
channel = manager.channels["mock"]
|
||||
msg = OutboundMessage(
|
||||
channel="mock",
|
||||
chat_id="c1",
|
||||
content="step-by-step",
|
||||
metadata={"_progress": True, "_reasoning_delta": True, "_stream_id": "r1"},
|
||||
)
|
||||
await manager._send_once(channel, msg)
|
||||
channel._delta_mock.assert_awaited_once()
|
||||
args = channel._delta_mock.await_args.args
|
||||
assert args[0] == "c1"
|
||||
assert args[1] == "step-by-step"
|
||||
channel._send_mock.assert_not_awaited()
|
||||
channel._end_mock.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_end_routes_to_send_reasoning_end(manager):
|
||||
channel = manager.channels["mock"]
|
||||
msg = OutboundMessage(
|
||||
channel="mock",
|
||||
chat_id="c1",
|
||||
content="",
|
||||
metadata={"_progress": True, "_reasoning_end": True, "_stream_id": "r1"},
|
||||
)
|
||||
await manager._send_once(channel, msg)
|
||||
channel._end_mock.assert_awaited_once()
|
||||
channel._delta_mock.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_legacy_one_shot_reasoning_expands_to_delta_plus_end(manager):
|
||||
"""`_reasoning` (no delta/end pair) falls back through `send_reasoning`
|
||||
which the base class expands to a single delta + end. Hooks that haven't
|
||||
migrated still surface in WebUI as a complete stream segment."""
|
||||
channel = manager.channels["mock"]
|
||||
msg = OutboundMessage(
|
||||
channel="mock",
|
||||
chat_id="c1",
|
||||
content="one-shot reasoning",
|
||||
metadata={"_progress": True, "_reasoning": True},
|
||||
)
|
||||
await manager._send_once(channel, msg)
|
||||
channel._delta_mock.assert_awaited_once()
|
||||
channel._end_mock.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_drops_reasoning_when_channel_opts_out(manager):
|
||||
channel = manager.channels["mock"]
|
||||
channel.show_reasoning = False
|
||||
msg = OutboundMessage(
|
||||
channel="mock",
|
||||
chat_id="c1",
|
||||
content="hidden thinking",
|
||||
metadata={"_progress": True, "_reasoning_delta": True},
|
||||
)
|
||||
await manager.bus.publish_outbound(msg)
|
||||
|
||||
await _pump_one(manager)
|
||||
|
||||
channel._delta_mock.assert_not_awaited()
|
||||
channel._end_mock.assert_not_awaited()
|
||||
channel._send_mock.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_delivers_reasoning_when_channel_opts_in(manager):
|
||||
channel = manager.channels["mock"]
|
||||
channel.show_reasoning = True
|
||||
for chunk in ("first ", "second"):
|
||||
await manager.bus.publish_outbound(OutboundMessage(
|
||||
channel="mock",
|
||||
chat_id="c1",
|
||||
content=chunk,
|
||||
metadata={"_progress": True, "_reasoning_delta": True, "_stream_id": "r1"},
|
||||
))
|
||||
await manager.bus.publish_outbound(OutboundMessage(
|
||||
channel="mock",
|
||||
chat_id="c1",
|
||||
content="",
|
||||
metadata={"_progress": True, "_reasoning_end": True, "_stream_id": "r1"},
|
||||
))
|
||||
|
||||
await _pump_one(manager)
|
||||
|
||||
assert channel._delta_mock.await_count == 2
|
||||
channel._end_mock.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_silently_drops_reasoning_for_unknown_channel(manager):
|
||||
msg = OutboundMessage(
|
||||
channel="ghost",
|
||||
chat_id="c1",
|
||||
content="nobody home",
|
||||
metadata={"_progress": True, "_reasoning_delta": True},
|
||||
)
|
||||
await manager.bus.publish_outbound(msg)
|
||||
|
||||
await _pump_one(manager)
|
||||
|
||||
manager.channels["mock"]._delta_mock.assert_not_awaited()
|
||||
manager.channels["mock"]._send_mock.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_base_channel_reasoning_primitives_are_noop_safe():
|
||||
"""Plugins that don't override the streaming primitives must not blow up."""
|
||||
|
||||
class _Plain(BaseChannel):
|
||||
name = "plain"
|
||||
display_name = "Plain"
|
||||
|
||||
async def start(self): # pragma: no cover
|
||||
pass
|
||||
|
||||
async def stop(self): # pragma: no cover
|
||||
pass
|
||||
|
||||
async def send(self, msg): # pragma: no cover
|
||||
pass
|
||||
|
||||
channel = _Plain({}, MessageBus())
|
||||
assert await channel.send_reasoning_delta("c", "x") is None
|
||||
assert await channel.send_reasoning_end("c") is None
|
||||
# And the one-shot wrapper translates without raising.
|
||||
assert await channel.send_reasoning(
|
||||
OutboundMessage(channel="plain", chat_id="c", content="x", metadata={})
|
||||
) is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_routing_does_not_consult_send_progress(manager):
|
||||
"""`show_reasoning` is orthogonal to `send_progress` — turning off
|
||||
progress streaming must not silence reasoning."""
|
||||
channel = manager.channels["mock"]
|
||||
channel.send_progress = False
|
||||
channel.show_reasoning = True
|
||||
await manager.bus.publish_outbound(OutboundMessage(
|
||||
channel="mock",
|
||||
chat_id="c1",
|
||||
content="still surfaces",
|
||||
metadata={"_progress": True, "_reasoning_delta": True},
|
||||
))
|
||||
|
||||
await _pump_one(manager)
|
||||
|
||||
channel._delta_mock.assert_awaited_once()
|
||||
|
||||
|
||||
async def _pump_one(manager: ChannelManager) -> None:
|
||||
"""Drive the dispatcher until the outbound queue drains, then cancel."""
|
||||
task = asyncio.create_task(manager._dispatch_outbound())
|
||||
for _ in range(50):
|
||||
await asyncio.sleep(0.01)
|
||||
if manager.bus.outbound.qsize() == 0:
|
||||
break
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
@ -358,6 +358,87 @@ async def test_send_delta_emits_delta_and_stream_end() -> None:
|
||||
assert second["stream_id"] == "sid"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_reasoning_delta_emits_streaming_frame() -> None:
|
||||
bus = MagicMock()
|
||||
channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
|
||||
mock_ws = AsyncMock()
|
||||
channel._attach(mock_ws, "chat-1")
|
||||
|
||||
await channel.send_reasoning_delta(
|
||||
"chat-1",
|
||||
"step-by-step thinking",
|
||||
{"_reasoning_delta": True, "_stream_id": "r1"},
|
||||
)
|
||||
|
||||
mock_ws.send.assert_awaited_once()
|
||||
payload = json.loads(mock_ws.send.await_args.args[0])
|
||||
assert payload["event"] == "reasoning_delta"
|
||||
assert payload["chat_id"] == "chat-1"
|
||||
assert payload["text"] == "step-by-step thinking"
|
||||
assert payload["stream_id"] == "r1"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_reasoning_end_emits_close_frame() -> None:
|
||||
bus = MagicMock()
|
||||
channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
|
||||
mock_ws = AsyncMock()
|
||||
channel._attach(mock_ws, "chat-1")
|
||||
|
||||
await channel.send_reasoning_end("chat-1", {"_reasoning_end": True, "_stream_id": "r1"})
|
||||
|
||||
payload = json.loads(mock_ws.send.await_args.args[0])
|
||||
assert payload == {"event": "reasoning_end", "chat_id": "chat-1", "stream_id": "r1"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_reasoning_one_shot_expands_to_delta_plus_end() -> None:
|
||||
"""``send_reasoning`` is back-compat for hooks that haven't migrated:
|
||||
the base implementation must produce one delta and one end so the
|
||||
WebUI sees the same shape either way."""
|
||||
bus = MagicMock()
|
||||
channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
|
||||
mock_ws = AsyncMock()
|
||||
channel._attach(mock_ws, "chat-1")
|
||||
|
||||
await channel.send_reasoning(OutboundMessage(
|
||||
channel="websocket",
|
||||
chat_id="chat-1",
|
||||
content="thinking",
|
||||
metadata={"_reasoning": True},
|
||||
))
|
||||
|
||||
assert mock_ws.send.await_count == 2
|
||||
first = json.loads(mock_ws.send.call_args_list[0][0][0])
|
||||
second = json.loads(mock_ws.send.call_args_list[1][0][0])
|
||||
assert first["event"] == "reasoning_delta"
|
||||
assert first["text"] == "thinking"
|
||||
assert second["event"] == "reasoning_end"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_reasoning_delta_drops_empty_chunks() -> None:
|
||||
bus = MagicMock()
|
||||
channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
|
||||
mock_ws = AsyncMock()
|
||||
channel._attach(mock_ws, "chat-1")
|
||||
|
||||
await channel.send_reasoning_delta("chat-1", "", {"_reasoning_delta": True})
|
||||
|
||||
mock_ws.send.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_reasoning_without_subscribers_is_noop() -> None:
|
||||
bus = MagicMock()
|
||||
channel = WebSocketChannel({"enabled": True, "allowFrom": ["*"]}, bus)
|
||||
|
||||
await channel.send_reasoning_delta("unattached", "thinking", None)
|
||||
await channel.send_reasoning_end("unattached", None)
|
||||
# No subscribers, no exception, no send.
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_turn_end_emits_turn_end_event() -> None:
|
||||
bus = MagicMock()
|
||||
|
||||
@ -1,4 +1,6 @@
|
||||
import asyncio
|
||||
from contextlib import nullcontext
|
||||
from io import StringIO
|
||||
from unittest.mock import AsyncMock, MagicMock, call, patch
|
||||
|
||||
import pytest
|
||||
@ -96,6 +98,66 @@ def test_print_cli_progress_line_pauses_spinner_before_printing():
|
||||
assert order == ["start", "stop", "print", "start", "stop"]
|
||||
|
||||
|
||||
def test_thinking_spinner_clears_status_line_when_paused():
|
||||
"""Stopping the spinner should erase its transient line before output."""
|
||||
stream = StringIO()
|
||||
stream.isatty = lambda: True # type: ignore[method-assign]
|
||||
mock_console = MagicMock()
|
||||
mock_console.file = stream
|
||||
spinner = MagicMock()
|
||||
mock_console.status.return_value = spinner
|
||||
|
||||
thinking = stream_mod.ThinkingSpinner(console=mock_console)
|
||||
with thinking:
|
||||
with thinking.pause():
|
||||
pass
|
||||
|
||||
assert "\r\x1b[2K" in stream.getvalue()
|
||||
|
||||
|
||||
def test_stream_renderer_stops_spinner_even_after_header_printed():
|
||||
"""A later answer delta must stop the spinner even when header already exists."""
|
||||
stream = StringIO()
|
||||
stream.isatty = lambda: True # type: ignore[method-assign]
|
||||
mock_console = MagicMock()
|
||||
mock_console.file = stream
|
||||
spinner = MagicMock()
|
||||
mock_console.status.return_value = spinner
|
||||
|
||||
with patch.object(stream_mod, "_make_console", return_value=mock_console):
|
||||
renderer = stream_mod.StreamRenderer(show_spinner=True)
|
||||
renderer._header_printed = True
|
||||
renderer.ensure_header()
|
||||
|
||||
spinner.stop.assert_called_once()
|
||||
assert "\r\x1b[2K" in stream.getvalue()
|
||||
|
||||
|
||||
def test_print_cli_progress_line_opens_renderer_header_before_trace():
|
||||
"""Trace lines should appear under the assistant header, not under You."""
|
||||
order: list[str] = []
|
||||
renderer = MagicMock()
|
||||
renderer.console.print.side_effect = lambda *_args, **_kwargs: order.append("print")
|
||||
renderer.ensure_header.side_effect = lambda: order.append("header")
|
||||
renderer.pause_spinner.return_value = nullcontext()
|
||||
|
||||
commands._print_cli_progress_line("tool running", None, renderer)
|
||||
|
||||
assert order == ["header", "print"]
|
||||
|
||||
|
||||
def test_print_cli_progress_line_stops_live_before_trace():
|
||||
"""A trace line should not leak the current transient Live frame."""
|
||||
mock_live = MagicMock()
|
||||
renderer = stream_mod.StreamRenderer(show_spinner=False)
|
||||
renderer._live = mock_live
|
||||
|
||||
commands._print_cli_progress_line("tool running", None, renderer)
|
||||
|
||||
mock_live.stop.assert_called_once()
|
||||
assert renderer._live is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_print_interactive_progress_line_pauses_spinner_before_printing():
|
||||
"""Interactive progress output should also pause spinner cleanly."""
|
||||
@ -156,17 +218,65 @@ def test_stream_renderer_stop_for_input_stops_spinner():
|
||||
# Create renderer with mocked console
|
||||
with patch.object(stream_mod, "_make_console", return_value=mock_console):
|
||||
renderer = stream_mod.StreamRenderer(show_spinner=True)
|
||||
|
||||
|
||||
# Verify spinner started
|
||||
spinner.start.assert_called_once()
|
||||
|
||||
|
||||
# Stop for input
|
||||
renderer.stop_for_input()
|
||||
|
||||
|
||||
# Verify spinner stopped
|
||||
spinner.stop.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_on_end_writes_final_content_to_stdout_after_stopping_live():
|
||||
"""on_end should stop Live (transient erases it) then print final content to stdout."""
|
||||
mock_live = MagicMock()
|
||||
mock_console = MagicMock()
|
||||
mock_console.capture.return_value.__enter__ = MagicMock(
|
||||
return_value=MagicMock(get=lambda: "final output\n")
|
||||
)
|
||||
mock_console.capture.return_value.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch.object(stream_mod, "_make_console", return_value=mock_console):
|
||||
renderer = stream_mod.StreamRenderer(show_spinner=False)
|
||||
renderer._live = mock_live
|
||||
renderer._buf = "final output"
|
||||
|
||||
written: list[str] = []
|
||||
with patch("sys.stdout") as mock_stdout:
|
||||
mock_stdout.write = lambda s: written.append(s)
|
||||
mock_stdout.flush = MagicMock()
|
||||
await renderer.on_end()
|
||||
|
||||
mock_live.stop.assert_called_once()
|
||||
assert renderer._live is None
|
||||
assert written == ["final output\n"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_on_end_resuming_clears_buffer_and_restarts_spinner():
|
||||
"""on_end(resuming=True) should reset state for the next iteration."""
|
||||
spinner = MagicMock()
|
||||
mock_console = MagicMock()
|
||||
mock_console.status.return_value = spinner
|
||||
mock_console.capture.return_value.__enter__ = MagicMock(
|
||||
return_value=MagicMock(get=lambda: "")
|
||||
)
|
||||
mock_console.capture.return_value.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch.object(stream_mod, "_make_console", return_value=mock_console):
|
||||
renderer = stream_mod.StreamRenderer(show_spinner=True)
|
||||
renderer._buf = "some content"
|
||||
|
||||
await renderer.on_end(resuming=True)
|
||||
|
||||
assert renderer._buf == ""
|
||||
# Spinner should have been restarted (start called twice: __init__ + resuming)
|
||||
assert spinner.start.call_count == 2
|
||||
|
||||
|
||||
def test_make_console_force_terminal_when_stdout_is_tty():
|
||||
"""Console should set force_terminal=True when stdout is a TTY (rich output)."""
|
||||
import sys
|
||||
|
||||
@ -17,7 +17,7 @@ async def test_interactive_retry_wait_is_rendered_as_progress_even_when_progress
|
||||
metadata={"_retry_wait": True},
|
||||
)
|
||||
|
||||
async def fake_print(text: str, active_thinking: object | None) -> None:
|
||||
async def fake_print(text: str, active_thinking: object | None, renderer=None) -> None:
|
||||
calls.append((text, active_thinking))
|
||||
|
||||
with patch("nanobot.cli.commands._print_interactive_progress_line", side_effect=fake_print):
|
||||
@ -29,3 +29,104 @@ async def test_interactive_retry_wait_is_rendered_as_progress_even_when_progress
|
||||
|
||||
assert handled is True
|
||||
assert calls == [("Model request failed, retry in 2s (attempt 1).", thinking)]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_displayed_when_show_reasoning_enabled():
|
||||
"""Reasoning content should be displayed when show_reasoning is True."""
|
||||
calls: list[str] = []
|
||||
channels_config = SimpleNamespace(
|
||||
send_progress=True, send_tool_hints=False, show_reasoning=True,
|
||||
)
|
||||
msg = SimpleNamespace(
|
||||
content="Let me think about this...",
|
||||
metadata={"_progress": True, "_reasoning": True},
|
||||
)
|
||||
|
||||
with patch("nanobot.cli.commands._print_cli_reasoning", side_effect=lambda t, th, r=None: calls.append(t)):
|
||||
handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
|
||||
|
||||
assert handled is True
|
||||
assert calls == ["Let me think about this..."]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_delta_displayed_when_show_reasoning_enabled():
|
||||
"""Streamed reasoning delta frames should use the reasoning renderer."""
|
||||
calls: list[str] = []
|
||||
channels_config = SimpleNamespace(
|
||||
send_progress=True, send_tool_hints=False, show_reasoning=True,
|
||||
)
|
||||
msg = SimpleNamespace(
|
||||
content="I should search first.",
|
||||
metadata={"_progress": True, "_reasoning_delta": True},
|
||||
)
|
||||
|
||||
with patch("nanobot.cli.commands._print_cli_reasoning", side_effect=lambda t, th, r=None: calls.append(t)):
|
||||
handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
|
||||
|
||||
assert handled is True
|
||||
assert calls == ["I should search first."]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_hidden_when_show_reasoning_disabled():
|
||||
"""Reasoning content should be suppressed when show_reasoning is False."""
|
||||
channels_config = SimpleNamespace(
|
||||
send_progress=True, send_tool_hints=False, show_reasoning=False,
|
||||
)
|
||||
msg = SimpleNamespace(
|
||||
content="Let me think about this...",
|
||||
metadata={"_progress": True, "_reasoning": True},
|
||||
)
|
||||
|
||||
with patch("nanobot.cli.commands._print_cli_reasoning") as mock_reasoning:
|
||||
handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
|
||||
|
||||
assert handled is True
|
||||
mock_reasoning.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_reasoning_progress_not_affected_by_show_reasoning():
|
||||
"""Regular progress lines should display regardless of show_reasoning."""
|
||||
calls: list[str] = []
|
||||
channels_config = SimpleNamespace(
|
||||
send_progress=True, send_tool_hints=False, show_reasoning=False,
|
||||
)
|
||||
msg = SimpleNamespace(
|
||||
content="working on it...",
|
||||
metadata={"_progress": True},
|
||||
)
|
||||
|
||||
async def fake_print(text: str, thinking=None, renderer=None):
|
||||
calls.append(text)
|
||||
|
||||
with patch("nanobot.cli.commands._print_interactive_progress_line", side_effect=fake_print):
|
||||
handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
|
||||
|
||||
assert handled is True
|
||||
assert calls == ["working on it..."]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_shown_when_send_progress_disabled():
|
||||
"""Reasoning display is governed by `show_reasoning` alone, independent
|
||||
of `send_progress` — the two knobs are orthogonal."""
|
||||
calls: list[str] = []
|
||||
channels_config = SimpleNamespace(
|
||||
send_progress=False, send_tool_hints=False, show_reasoning=True,
|
||||
)
|
||||
msg = SimpleNamespace(
|
||||
content="Let me think about this...",
|
||||
metadata={"_progress": True, "_reasoning": True},
|
||||
)
|
||||
|
||||
with patch(
|
||||
"nanobot.cli.commands._print_cli_reasoning",
|
||||
side_effect=lambda t, th, r=None: calls.append(t),
|
||||
):
|
||||
handled = await commands._maybe_print_interactive_progress(msg, None, channels_config)
|
||||
|
||||
assert handled is True
|
||||
assert calls == ["Let me think about this..."]
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from nanobot.utils.helpers import strip_think
|
||||
from nanobot.utils.helpers import extract_reasoning, extract_think, strip_think
|
||||
|
||||
|
||||
class TestStripThinkTag:
|
||||
@ -144,3 +144,130 @@ class TestStripThinkConservativePreserve:
|
||||
def test_literal_channel_marker_in_code_block_preserved(self):
|
||||
text = "Example:\n```\nif line.startswith('<channel|>'):\n skip()\n```"
|
||||
assert strip_think(text) == text
|
||||
|
||||
|
||||
class TestExtractThink:
|
||||
|
||||
def test_no_think_tags(self):
|
||||
thinking, clean = extract_think("Hello World")
|
||||
assert thinking is None
|
||||
assert clean == "Hello World"
|
||||
|
||||
def test_single_think_block(self):
|
||||
text = "Hello <think>reasoning content\nhere</think> World"
|
||||
thinking, clean = extract_think(text)
|
||||
assert thinking == "reasoning content\nhere"
|
||||
assert clean == "Hello World"
|
||||
|
||||
def test_single_thought_block(self):
|
||||
text = "Hello <thought>reasoning content</thought> World"
|
||||
thinking, clean = extract_think(text)
|
||||
assert thinking == "reasoning content"
|
||||
assert clean == "Hello World"
|
||||
|
||||
def test_multiple_think_blocks(self):
|
||||
text = "A<think>first</think>B<thought>second</thought>C"
|
||||
thinking, clean = extract_think(text)
|
||||
assert thinking == "first\n\nsecond"
|
||||
assert clean == "ABC"
|
||||
|
||||
def test_think_only_no_content(self):
|
||||
text = "<think>just thinking</think>"
|
||||
thinking, clean = extract_think(text)
|
||||
assert thinking == "just thinking"
|
||||
assert clean == ""
|
||||
|
||||
def test_unclosed_think_not_extracted(self):
|
||||
# Unclosed blocks at start are stripped but NOT extracted
|
||||
text = "<think>unclosed thinking..."
|
||||
thinking, clean = extract_think(text)
|
||||
assert thinking is None
|
||||
assert clean == ""
|
||||
|
||||
def test_empty_think_block(self):
|
||||
text = "Hello <think></think> World"
|
||||
thinking, clean = extract_think(text)
|
||||
# Empty blocks result in empty string after strip
|
||||
assert thinking == ""
|
||||
assert clean == "Hello World"
|
||||
|
||||
def test_think_with_whitespace_only(self):
|
||||
text = "Hello <think> \n World"
|
||||
thinking, clean = extract_think(text)
|
||||
assert thinking is None
|
||||
assert clean == "Hello <think> \n World"
|
||||
|
||||
def test_mixed_think_and_thought(self):
|
||||
text = "Start<think>first reasoning</think>middle<thought>second reasoning</thought>End"
|
||||
thinking, clean = extract_think(text)
|
||||
assert thinking == "first reasoning\n\nsecond reasoning"
|
||||
assert clean == "StartmiddleEnd"
|
||||
|
||||
def test_real_world_ollama_response(self):
|
||||
text = """<think>
|
||||
The user is asking about Python list comprehensions.
|
||||
Let me explain the syntax and give examples.
|
||||
</think>
|
||||
|
||||
List comprehensions in Python provide a concise way to create lists. Here's the syntax:
|
||||
|
||||
```python
|
||||
[expression for item in iterable if condition]
|
||||
```
|
||||
|
||||
For example:
|
||||
```python
|
||||
squares = [x**2 for x in range(10)]
|
||||
```"""
|
||||
thinking, clean = extract_think(text)
|
||||
assert "list comprehensions" in thinking.lower()
|
||||
assert "Let me explain" in thinking
|
||||
assert "List comprehensions in Python" in clean
|
||||
assert "<think>" not in clean
|
||||
assert "</think>" not in clean
|
||||
|
||||
|
||||
class TestExtractReasoning:
|
||||
"""Single source of truth for reasoning extraction across all providers."""
|
||||
|
||||
def test_prefers_reasoning_content_and_strips_inline_think(self):
|
||||
# Dedicated field wins; inline tags are still scrubbed from content.
|
||||
reasoning, content = extract_reasoning(
|
||||
"dedicated",
|
||||
None,
|
||||
"<think>inline</think>visible answer",
|
||||
)
|
||||
assert reasoning == "dedicated"
|
||||
assert content == "visible answer"
|
||||
|
||||
def test_falls_back_to_thinking_blocks(self):
|
||||
reasoning, content = extract_reasoning(
|
||||
None,
|
||||
[
|
||||
{"type": "thinking", "thinking": "step 1"},
|
||||
{"type": "thinking", "thinking": "step 2"},
|
||||
{"type": "redacted_thinking"},
|
||||
],
|
||||
"hello",
|
||||
)
|
||||
assert reasoning == "step 1\n\nstep 2"
|
||||
assert content == "hello"
|
||||
|
||||
def test_falls_back_to_inline_think_tags(self):
|
||||
reasoning, content = extract_reasoning(
|
||||
None, None, "<think>plan</think>answer"
|
||||
)
|
||||
assert reasoning == "plan"
|
||||
assert content == "answer"
|
||||
|
||||
def test_no_reasoning_returns_none(self):
|
||||
reasoning, content = extract_reasoning(None, None, "plain answer")
|
||||
assert reasoning is None
|
||||
assert content == "plain answer"
|
||||
|
||||
def test_empty_thinking_blocks_falls_through_to_inline(self):
|
||||
reasoning, content = extract_reasoning(
|
||||
None, [], "<think>plan</think>answer"
|
||||
)
|
||||
assert reasoning == "plan"
|
||||
assert content == "answer"
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { Check, ChevronRight, Copy, FileIcon, ImageIcon, PlaySquare, Wrench } from "lucide-react";
|
||||
import { Check, ChevronRight, Copy, FileIcon, ImageIcon, PlaySquare, Sparkles, Wrench } from "lucide-react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
|
||||
import { ImageLightbox } from "@/components/ImageLightbox";
|
||||
@ -85,12 +85,18 @@ export function MessageBubble({ message }: MessageBubbleProps) {
|
||||
|
||||
const empty = message.content.trim().length === 0;
|
||||
const media = message.media ?? [];
|
||||
const reasoning = message.role === "assistant" ? message.reasoning ?? "" : "";
|
||||
const reasoningStreaming = !!(message.role === "assistant" && message.reasoningStreaming);
|
||||
const hasReasoning = reasoning.length > 0 || reasoningStreaming;
|
||||
const showAssistantActions = message.role === "assistant" && !message.isStreaming && !empty;
|
||||
return (
|
||||
<div className={cn("w-full text-[15px]", baseAnim)} style={{ lineHeight: "var(--cjk-line-height)" }}>
|
||||
{empty && message.isStreaming ? (
|
||||
{hasReasoning ? (
|
||||
<ReasoningBubble text={reasoning} streaming={reasoningStreaming} hasBodyBelow={!empty} />
|
||||
) : null}
|
||||
{empty && message.isStreaming && !hasReasoning ? (
|
||||
<TypingDots />
|
||||
) : (
|
||||
) : empty && message.isStreaming ? null : (
|
||||
<>
|
||||
<MarkdownText>{message.content}</MarkdownText>
|
||||
{message.isStreaming && <StreamCursor />}
|
||||
@ -380,14 +386,14 @@ interface TraceGroupProps {
|
||||
|
||||
/**
|
||||
* Collapsible group of tool-call / progress breadcrumbs. Defaults to
|
||||
* expanded for discoverability; a single click on the header folds the
|
||||
* group down to a one-line summary so it never dominates the thread.
|
||||
* collapsed because tool traces are supporting evidence, not the answer.
|
||||
* A single click expands the exact calls when the user wants details.
|
||||
*/
|
||||
function TraceGroup({ message, animClass }: TraceGroupProps) {
|
||||
const { t } = useTranslation();
|
||||
const lines = message.traces ?? [message.content];
|
||||
const count = lines.length;
|
||||
const [open, setOpen] = useState(true);
|
||||
const [open, setOpen] = useState(false);
|
||||
return (
|
||||
<div className={cn("w-full", animClass)}>
|
||||
<button
|
||||
@ -433,3 +439,79 @@ function TraceGroup({ message, animClass }: TraceGroupProps) {
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
interface ReasoningBubbleProps {
|
||||
text: string;
|
||||
streaming: boolean;
|
||||
hasBodyBelow: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subordinate "thinking" trace shown above an assistant turn.
|
||||
*
|
||||
* Lifecycle:
|
||||
* - While ``streaming`` is true (``reasoning_delta`` frames still arriving),
|
||||
* the bubble defaults to open and the header runs a shimmer + pulse so
|
||||
* the user sees the model "thinking out loud" in real time.
|
||||
* - On ``reasoning_end`` the bubble auto-collapses for prose density —
|
||||
* the user can re-expand to inspect the chain of thought. The local
|
||||
* toggle persists once the user interacts.
|
||||
*/
|
||||
function ReasoningBubble({ text, streaming, hasBodyBelow }: ReasoningBubbleProps) {
|
||||
const { t } = useTranslation();
|
||||
const [userToggled, setUserToggled] = useState(false);
|
||||
const [openLocal, setOpenLocal] = useState(true);
|
||||
const open = userToggled ? openLocal : streaming;
|
||||
const onToggle = () => {
|
||||
setUserToggled(true);
|
||||
setOpenLocal((v) => (userToggled ? !v : !open));
|
||||
};
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
"w-full animate-in fade-in-0 slide-in-from-top-1 duration-200",
|
||||
hasBodyBelow && "mb-2",
|
||||
)}
|
||||
>
|
||||
<button
|
||||
type="button"
|
||||
onClick={onToggle}
|
||||
className={cn(
|
||||
"group flex w-full items-center gap-2 rounded-md px-2 py-1.5",
|
||||
"text-xs text-muted-foreground transition-colors hover:bg-muted/45",
|
||||
streaming && "reasoning-shimmer",
|
||||
)}
|
||||
aria-expanded={open}
|
||||
aria-live={streaming ? "polite" : undefined}
|
||||
>
|
||||
<Sparkles
|
||||
className={cn("h-3.5 w-3.5", streaming && "animate-pulse")}
|
||||
aria-hidden
|
||||
/>
|
||||
<span className="font-medium">
|
||||
{streaming
|
||||
? t("message.reasoningStreaming", { defaultValue: "Thinking…" })
|
||||
: t("message.reasoning", { defaultValue: "Thinking" })}
|
||||
</span>
|
||||
<ChevronRight
|
||||
aria-hidden
|
||||
className={cn(
|
||||
"ml-auto h-3.5 w-3.5 transition-transform duration-200",
|
||||
open && "rotate-90",
|
||||
)}
|
||||
/>
|
||||
</button>
|
||||
{open && text.length > 0 && (
|
||||
<div
|
||||
className={cn(
|
||||
"mt-1 space-y-0.5 whitespace-pre-wrap break-words border-l border-muted-foreground/20 pl-3",
|
||||
"animate-in fade-in-0 slide-in-from-top-1 duration-200",
|
||||
"text-[12.5px] italic leading-relaxed text-muted-foreground/85",
|
||||
)}
|
||||
>
|
||||
{text}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import {
|
||||
useCallback,
|
||||
useEffect,
|
||||
useLayoutEffect,
|
||||
useMemo,
|
||||
useRef,
|
||||
useState,
|
||||
@ -77,6 +78,17 @@ const COMMAND_ICONS: Record<string, LucideIcon> = {
|
||||
type ImageAspectRatio = "auto" | "1:1" | "3:4" | "9:16" | "4:3" | "16:9";
|
||||
|
||||
const IMAGE_ASPECT_RATIOS: ImageAspectRatio[] = ["auto", "1:1", "3:4", "9:16", "4:3", "16:9"];
|
||||
const SLASH_PALETTE_GAP_PX = 8;
|
||||
const SLASH_PALETTE_MAX_HEIGHT_PX = 288;
|
||||
const SLASH_PALETTE_MIN_HEIGHT_PX = 144;
|
||||
const SLASH_PALETTE_CHROME_PX = 64;
|
||||
|
||||
type SlashPalettePlacement = "above" | "below";
|
||||
|
||||
interface SlashPaletteLayout {
|
||||
placement: SlashPalettePlacement;
|
||||
maxHeight: number;
|
||||
}
|
||||
|
||||
function slashCommandI18nKey(command: string): string {
|
||||
return command.replace(/^\//, "").replace(/-/g, "_");
|
||||
@ -96,6 +108,24 @@ function scrollNearestOverflowParent(target: EventTarget | null, deltaY: number)
|
||||
}
|
||||
}
|
||||
|
||||
function getVisibleBounds(el: HTMLElement): { top: number; bottom: number } {
|
||||
let top = 0;
|
||||
let bottom = window.innerHeight;
|
||||
let parent = el.parentElement;
|
||||
|
||||
while (parent) {
|
||||
const style = window.getComputedStyle(parent);
|
||||
if (/(auto|scroll|hidden|clip)/.test(style.overflowY)) {
|
||||
const rect = parent.getBoundingClientRect();
|
||||
top = Math.max(top, rect.top);
|
||||
bottom = Math.min(bottom, rect.bottom);
|
||||
}
|
||||
parent = parent.parentElement;
|
||||
}
|
||||
|
||||
return { top, bottom };
|
||||
}
|
||||
|
||||
export function ThreadComposer({
|
||||
onSend,
|
||||
disabled,
|
||||
@ -117,6 +147,7 @@ export function ThreadComposer({
|
||||
const [imageAspectRatio, setImageAspectRatio] = useState<ImageAspectRatio>("auto");
|
||||
const [aspectMenuOpen, setAspectMenuOpen] = useState(false);
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
const formRef = useRef<HTMLFormElement>(null);
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const aspectControlRef = useRef<HTMLDivElement>(null);
|
||||
const chipRefs = useRef(new Map<string, HTMLButtonElement>());
|
||||
@ -221,6 +252,10 @@ export function ThreadComposer({
|
||||
}, [slashCommands, slashQuery, t]);
|
||||
|
||||
const showSlashMenu = filteredSlashCommands.length > 0;
|
||||
const [slashPaletteLayout, setSlashPaletteLayout] = useState<SlashPaletteLayout>({
|
||||
placement: "above",
|
||||
maxHeight: SLASH_PALETTE_MAX_HEIGHT_PX,
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
setSelectedCommandIndex(0);
|
||||
@ -232,6 +267,56 @@ export function ThreadComposer({
|
||||
}
|
||||
}, [filteredSlashCommands.length, selectedCommandIndex]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!showSlashMenu) return;
|
||||
|
||||
const dismissOnPointerDown = (event: PointerEvent) => {
|
||||
const target = event.target;
|
||||
if (target instanceof Node && formRef.current?.contains(target)) return;
|
||||
setSlashMenuDismissed(true);
|
||||
};
|
||||
|
||||
document.addEventListener("pointerdown", dismissOnPointerDown, true);
|
||||
return () => {
|
||||
document.removeEventListener("pointerdown", dismissOnPointerDown, true);
|
||||
};
|
||||
}, [showSlashMenu]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
if (!showSlashMenu) return;
|
||||
|
||||
const updateLayout = () => {
|
||||
const form = formRef.current;
|
||||
if (!form) return;
|
||||
const rect = form.getBoundingClientRect();
|
||||
if (rect.width === 0 && rect.height === 0) return;
|
||||
|
||||
const bounds = getVisibleBounds(form);
|
||||
const spaceAbove = Math.max(0, rect.top - bounds.top - SLASH_PALETTE_GAP_PX);
|
||||
const spaceBelow = Math.max(0, bounds.bottom - rect.bottom - SLASH_PALETTE_GAP_PX);
|
||||
const placement: SlashPalettePlacement =
|
||||
spaceAbove >= SLASH_PALETTE_MIN_HEIGHT_PX || spaceAbove >= spaceBelow
|
||||
? "above"
|
||||
: "below";
|
||||
const available = placement === "above" ? spaceAbove : spaceBelow;
|
||||
const maxHeight = Math.min(SLASH_PALETTE_MAX_HEIGHT_PX, available);
|
||||
|
||||
setSlashPaletteLayout((current) =>
|
||||
current.placement === placement && current.maxHeight === maxHeight
|
||||
? current
|
||||
: { placement, maxHeight },
|
||||
);
|
||||
};
|
||||
|
||||
updateLayout();
|
||||
window.addEventListener("resize", updateLayout);
|
||||
document.addEventListener("scroll", updateLayout, true);
|
||||
return () => {
|
||||
window.removeEventListener("resize", updateLayout);
|
||||
document.removeEventListener("scroll", updateLayout, true);
|
||||
};
|
||||
}, [filteredSlashCommands.length, showSlashMenu]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!aspectMenuOpen) return;
|
||||
|
||||
@ -398,6 +483,7 @@ export function ThreadComposer({
|
||||
|
||||
return (
|
||||
<form
|
||||
ref={formRef}
|
||||
onSubmit={(e) => {
|
||||
e.preventDefault();
|
||||
submit();
|
||||
@ -412,6 +498,7 @@ export function ThreadComposer({
|
||||
<SlashCommandPalette
|
||||
commands={filteredSlashCommands}
|
||||
selectedIndex={selectedCommandIndex}
|
||||
layout={slashPaletteLayout}
|
||||
isHero={isHero}
|
||||
onHover={setSelectedCommandIndex}
|
||||
onChoose={chooseSlashCommand}
|
||||
@ -634,6 +721,7 @@ export function ThreadComposer({
|
||||
interface SlashCommandPaletteProps {
|
||||
commands: SlashCommand[];
|
||||
selectedIndex: number;
|
||||
layout: SlashPaletteLayout;
|
||||
isHero: boolean;
|
||||
onHover: (index: number) => void;
|
||||
onChoose: (command: SlashCommand) => void;
|
||||
@ -695,17 +783,24 @@ function ImageAspectMenu({
|
||||
function SlashCommandPalette({
|
||||
commands,
|
||||
selectedIndex,
|
||||
layout,
|
||||
isHero,
|
||||
onHover,
|
||||
onChoose,
|
||||
}: SlashCommandPaletteProps) {
|
||||
const { t } = useTranslation();
|
||||
const listMaxHeight = Math.max(
|
||||
0,
|
||||
layout.maxHeight - SLASH_PALETTE_CHROME_PX,
|
||||
);
|
||||
return (
|
||||
<div
|
||||
role="listbox"
|
||||
aria-label={t("thread.composer.slash.ariaLabel")}
|
||||
style={{ maxHeight: layout.maxHeight }}
|
||||
className={cn(
|
||||
"absolute bottom-full left-1/2 z-30 mb-2 max-h-[22rem] w-[calc(100%-0.5rem)] -translate-x-1/2 overflow-hidden rounded-[18px] border",
|
||||
"absolute left-1/2 z-30 w-[calc(100%-0.5rem)] -translate-x-1/2 overflow-hidden rounded-[18px] border",
|
||||
layout.placement === "above" ? "bottom-full mb-2" : "top-full mt-2",
|
||||
"border-border/65 bg-popover p-1.5 text-popover-foreground shadow-[0_18px_55px_rgba(15,23,42,0.18)]",
|
||||
"dark:border-white/10 dark:shadow-[0_22px_55px_rgba(0,0,0,0.45)]",
|
||||
isHero ? "max-w-[58rem]" : "max-w-[49.5rem]",
|
||||
@ -714,7 +809,7 @@ function SlashCommandPalette({
|
||||
<div className="px-2 pb-1 pt-1 text-[11px] font-medium tracking-[0.08em] text-muted-foreground/70">
|
||||
{t("thread.composer.slash.label")}
|
||||
</div>
|
||||
<div className="max-h-[18rem] overflow-y-auto pr-0.5">
|
||||
<div className="overflow-y-auto pr-0.5" style={{ maxHeight: listMaxHeight }}>
|
||||
{commands.map((command, index) => {
|
||||
const Icon = COMMAND_ICONS[command.icon] ?? CircleHelp;
|
||||
const selected = index === selectedIndex;
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import { MessageBubble } from "@/components/MessageBubble";
|
||||
import { cn } from "@/lib/utils";
|
||||
import type { UIMessage } from "@/lib/types";
|
||||
|
||||
interface ThreadMessagesProps {
|
||||
@ -7,10 +8,30 @@ interface ThreadMessagesProps {
|
||||
|
||||
export function ThreadMessages({ messages }: ThreadMessagesProps) {
|
||||
return (
|
||||
<div className="flex w-full flex-col gap-5">
|
||||
{messages.map((message) => (
|
||||
<MessageBubble key={message.id} message={message} />
|
||||
))}
|
||||
<div className="flex w-full flex-col">
|
||||
{messages.map((message, index) => {
|
||||
const prev = messages[index - 1];
|
||||
const compact = isAuxiliaryRow(message) && prev && isAuxiliaryRow(prev);
|
||||
return (
|
||||
<div
|
||||
key={message.id}
|
||||
className={cn(index > 0 && (compact ? "mt-2" : "mt-5"))}
|
||||
>
|
||||
<MessageBubble message={message} />
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function isAuxiliaryRow(message: UIMessage): boolean {
|
||||
return (
|
||||
message.kind === "trace"
|
||||
|| (
|
||||
message.role === "assistant"
|
||||
&& message.content.trim().length === 0
|
||||
&& (!!message.reasoning || !!message.reasoningStreaming)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@ -260,6 +260,7 @@ export function ThreadShell({
|
||||
}
|
||||
modelLabel={toModelBadgeLabel(modelName)}
|
||||
variant="hero"
|
||||
slashCommands={slashCommands}
|
||||
imageMode={heroImageMode}
|
||||
onImageModeChange={setHeroImageMode}
|
||||
/>
|
||||
|
||||
@ -117,6 +117,34 @@
|
||||
--cjk-line-height: 1.625;
|
||||
}
|
||||
|
||||
/* Shimmer band sweeping across the reasoning header while
|
||||
``reasoning_delta`` frames are arriving. Pure CSS, no JS animation,
|
||||
respects ``prefers-reduced-motion``. */
|
||||
@keyframes reasoning-shimmer-sweep {
|
||||
0% {
|
||||
background-position: -200% 0;
|
||||
}
|
||||
100% {
|
||||
background-position: 200% 0;
|
||||
}
|
||||
}
|
||||
.reasoning-shimmer {
|
||||
background-image: linear-gradient(
|
||||
90deg,
|
||||
transparent 0%,
|
||||
hsl(var(--muted-foreground) / 0.18) 50%,
|
||||
transparent 100%
|
||||
);
|
||||
background-size: 200% 100%;
|
||||
background-repeat: no-repeat;
|
||||
animation: reasoning-shimmer-sweep 2.2s linear infinite;
|
||||
}
|
||||
@media (prefers-reduced-motion: reduce) {
|
||||
.reasoning-shimmer {
|
||||
animation: none;
|
||||
}
|
||||
}
|
||||
|
||||
/* Subtle scrollbar that doesn't fight the dark background. */
|
||||
.scrollbar-thin {
|
||||
scrollbar-width: thin;
|
||||
|
||||
@ -18,6 +18,95 @@ interface StreamBuffer {
|
||||
parts: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a reasoning chunk to the last open reasoning stream in ``prev``.
|
||||
*
|
||||
* Lookup rule: prefer the most recent assistant turn in the active UI tail.
|
||||
* Most providers emit reasoning before answer text, but some only expose
|
||||
* ``reasoning_content`` after the answer stream completes. In that post-hoc
|
||||
* case the reasoning still belongs to the same assistant turn and must render
|
||||
* above the answer, not as a new row below it.
|
||||
*/
|
||||
function attachReasoningChunk(prev: UIMessage[], chunk: string): UIMessage[] {
|
||||
for (let i = prev.length - 1; i >= 0; i -= 1) {
|
||||
const candidate = prev[i];
|
||||
// A user turn is a hard boundary: reasoning after it belongs to the new
|
||||
// assistant turn, never to an earlier assistant reply.
|
||||
if (candidate.role === "user") break;
|
||||
// A trace row (e.g. Used tools) is also a phase boundary. Reasoning after
|
||||
// tools belongs to the next assistant iteration, not the assistant turn
|
||||
// that produced those tool calls.
|
||||
if (candidate.kind === "trace") break;
|
||||
if (candidate.role !== "assistant") continue;
|
||||
const hasAnswer = candidate.content.length > 0;
|
||||
if (
|
||||
candidate.reasoningStreaming
|
||||
|| candidate.reasoning !== undefined
|
||||
|| hasAnswer
|
||||
|| candidate.isStreaming
|
||||
) {
|
||||
const merged: UIMessage = {
|
||||
...candidate,
|
||||
reasoning: (candidate.reasoning ?? "") + chunk,
|
||||
reasoningStreaming: true,
|
||||
};
|
||||
return [...prev.slice(0, i), merged, ...prev.slice(i + 1)];
|
||||
}
|
||||
if (!hasAnswer && candidate.isStreaming) {
|
||||
const merged: UIMessage = {
|
||||
...candidate,
|
||||
reasoning: chunk,
|
||||
reasoningStreaming: true,
|
||||
};
|
||||
return [...prev.slice(0, i), merged, ...prev.slice(i + 1)];
|
||||
}
|
||||
break;
|
||||
}
|
||||
return [
|
||||
...prev,
|
||||
{
|
||||
id: crypto.randomUUID(),
|
||||
role: "assistant",
|
||||
content: "",
|
||||
isStreaming: true,
|
||||
reasoning: chunk,
|
||||
reasoningStreaming: true,
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the most recent assistant placeholder that an incoming answer
|
||||
* delta should adopt instead of spawning a parallel row. We look for an
|
||||
* empty-content assistant turn that is still marked ``isStreaming`` —
|
||||
* typically created earlier by ``reasoning_delta``. Anything else means
|
||||
* the model already produced an answer in a previous turn, so the new
|
||||
* delta belongs in a fresh row.
|
||||
*/
|
||||
function findActiveAssistantPlaceholder(prev: UIMessage[]): string | null {
|
||||
const last = prev[prev.length - 1];
|
||||
if (!last) return null;
|
||||
if (last.role !== "assistant" || last.kind === "trace") return null;
|
||||
if (last.content.length > 0) return null;
|
||||
if (!last.isStreaming) return null;
|
||||
return last.id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the active reasoning stream segment, if any. Idempotent: a
|
||||
* ``reasoning_end`` with no preceding deltas is a harmless no-op.
|
||||
*/
|
||||
function closeReasoningStream(prev: UIMessage[]): UIMessage[] {
|
||||
for (let i = prev.length - 1; i >= 0; i -= 1) {
|
||||
const candidate = prev[i];
|
||||
if (!candidate.reasoningStreaming) continue;
|
||||
const merged: UIMessage = { ...candidate, reasoningStreaming: false };
|
||||
return [...prev.slice(0, i), merged, ...prev.slice(i + 1)];
|
||||
}
|
||||
return prev;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to a chat by ID. Returns the in-memory message list for the chat,
|
||||
* a streaming flag, and a ``send`` function. Initial history must be seeded
|
||||
@ -122,27 +211,42 @@ export function useNanobotStream(
|
||||
|
||||
if (ev.event === "delta") {
|
||||
if (suppressStreamUntilTurnEndRef.current) return;
|
||||
const id = buffer.current?.messageId ?? crypto.randomUUID();
|
||||
if (!buffer.current) {
|
||||
buffer.current = { messageId: id, parts: [] };
|
||||
setMessages((prev) => [
|
||||
...prev,
|
||||
{
|
||||
id,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
isStreaming: true,
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
]);
|
||||
setIsStreaming(true);
|
||||
}
|
||||
buffer.current.parts.push(ev.text);
|
||||
const combined = buffer.current.parts.join("");
|
||||
const targetId = buffer.current.messageId;
|
||||
setMessages((prev) =>
|
||||
prev.map((m) => (m.id === targetId ? { ...m, content: combined } : m)),
|
||||
);
|
||||
const chunk = ev.text;
|
||||
setIsStreaming(true);
|
||||
setMessages((prev) => {
|
||||
// Reuse an in-flight assistant placeholder (typically created by
|
||||
// ``reasoning_delta``) so the answer renders below its own
|
||||
// thinking trace instead of in a parallel row.
|
||||
const adopted = !buffer.current ? findActiveAssistantPlaceholder(prev) : null;
|
||||
let targetId: string;
|
||||
let next: UIMessage[];
|
||||
if (buffer.current) {
|
||||
targetId = buffer.current.messageId;
|
||||
next = prev;
|
||||
} else if (adopted) {
|
||||
targetId = adopted;
|
||||
buffer.current = { messageId: targetId, parts: [] };
|
||||
next = prev;
|
||||
} else {
|
||||
targetId = crypto.randomUUID();
|
||||
buffer.current = { messageId: targetId, parts: [] };
|
||||
next = [
|
||||
...prev,
|
||||
{
|
||||
id: targetId,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
isStreaming: true,
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
];
|
||||
}
|
||||
buffer.current.parts.push(chunk);
|
||||
const combined = buffer.current.parts.join("");
|
||||
return next.map((m) =>
|
||||
m.id === targetId ? { ...m, content: combined, isStreaming: true } : m,
|
||||
);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
@ -159,6 +263,21 @@ export function useNanobotStream(
|
||||
return;
|
||||
}
|
||||
|
||||
if (ev.event === "reasoning_delta") {
|
||||
if (suppressStreamUntilTurnEndRef.current) return;
|
||||
const chunk = ev.text;
|
||||
if (!chunk) return;
|
||||
setMessages((prev) => attachReasoningChunk(prev, chunk));
|
||||
setIsStreaming(true);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ev.event === "reasoning_end") {
|
||||
if (suppressStreamUntilTurnEndRef.current) return;
|
||||
setMessages((prev) => closeReasoningStream(prev));
|
||||
return;
|
||||
}
|
||||
|
||||
if (ev.event === "turn_end") {
|
||||
// Definitive signal that the turn is fully complete. Cancel any
|
||||
// pending debounce timer and stop the loading indicator immediately.
|
||||
@ -183,10 +302,19 @@ export function useNanobotStream(
|
||||
if (ev.event === "message") {
|
||||
if (
|
||||
suppressStreamUntilTurnEndRef.current &&
|
||||
(ev.kind === "tool_hint" || ev.kind === "progress")
|
||||
(ev.kind === "tool_hint" || ev.kind === "progress" || ev.kind === "reasoning")
|
||||
) {
|
||||
return;
|
||||
}
|
||||
// Back-compat: a legacy ``kind: "reasoning"`` message (no streaming
|
||||
// partner) is treated as one complete delta + immediate end so the
|
||||
// bubble renders identically to the streaming path.
|
||||
if (ev.kind === "reasoning") {
|
||||
const line = ev.text;
|
||||
if (!line) return;
|
||||
setMessages((prev) => closeReasoningStream(attachReasoningChunk(prev, line)));
|
||||
return;
|
||||
}
|
||||
// Intermediate agent breadcrumbs (tool-call hints, raw progress).
|
||||
// Attach them to the last trace row if it was the last emitted item
|
||||
// so a sequence of calls collapses into one compact trace group.
|
||||
|
||||
@ -14,6 +14,48 @@ import type { ChatSummary, UIMessage } from "@/lib/types";
|
||||
|
||||
const EMPTY_MESSAGES: UIMessage[] = [];
|
||||
|
||||
type HistoryMessage = Awaited<ReturnType<typeof fetchSessionMessages>>["messages"][number];
|
||||
|
||||
function reasoningFromHistory(message: HistoryMessage): string | undefined {
|
||||
if (typeof message.reasoning_content === "string" && message.reasoning_content.trim()) {
|
||||
return message.reasoning_content;
|
||||
}
|
||||
if (!Array.isArray(message.thinking_blocks)) return undefined;
|
||||
const parts = message.thinking_blocks
|
||||
.map((block) => {
|
||||
if (!block || typeof block !== "object") return "";
|
||||
const thinking = (block as { thinking?: unknown }).thinking;
|
||||
return typeof thinking === "string" ? thinking.trim() : "";
|
||||
})
|
||||
.filter(Boolean);
|
||||
return parts.length > 0 ? parts.join("\n\n") : undefined;
|
||||
}
|
||||
|
||||
function formatToolCallTrace(call: unknown): string | null {
|
||||
if (!call || typeof call !== "object") return null;
|
||||
const item = call as {
|
||||
name?: unknown;
|
||||
function?: { name?: unknown; arguments?: unknown };
|
||||
};
|
||||
const name =
|
||||
typeof item.function?.name === "string"
|
||||
? item.function.name
|
||||
: typeof item.name === "string"
|
||||
? item.name
|
||||
: "";
|
||||
if (!name) return null;
|
||||
const args = item.function?.arguments;
|
||||
if (typeof args === "string" && args.trim()) return `${name}(${args})`;
|
||||
return `${name}()`;
|
||||
}
|
||||
|
||||
function toolTracesFromHistory(message: HistoryMessage): string[] {
|
||||
if (!Array.isArray(message.tool_calls)) return [];
|
||||
return message.tool_calls
|
||||
.map(formatToolCallTrace)
|
||||
.filter((trace): trace is string => !!trace);
|
||||
}
|
||||
|
||||
/** Sidebar state: fetches the full session list and exposes create / delete actions. */
|
||||
export function useSessions(): {
|
||||
sessions: ChatSummary[];
|
||||
@ -143,14 +185,28 @@ export function useSessionHistory(key: string | null): {
|
||||
m.role === "user" && media?.every((item) => item.kind === "image")
|
||||
? media.map((item) => ({ url: item.url, name: item.name }))
|
||||
: undefined;
|
||||
const row: UIMessage = {
|
||||
id: `hist-${idx}`,
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
createdAt: m.timestamp ? Date.parse(m.timestamp) : Date.now(),
|
||||
...(images ? { images } : {}),
|
||||
...(media ? { media } : {}),
|
||||
...(m.role === "assistant" && reasoningFromHistory(m)
|
||||
? { reasoning: reasoningFromHistory(m), reasoningStreaming: false }
|
||||
: {}),
|
||||
};
|
||||
const traces = m.role === "assistant" ? toolTracesFromHistory(m) : [];
|
||||
if (traces.length === 0) return [row];
|
||||
return [
|
||||
...(row.content.trim() || row.reasoning || row.media?.length ? [row] : []),
|
||||
{
|
||||
id: `hist-${idx}`,
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
id: `hist-${idx}-tools`,
|
||||
role: "tool" as const,
|
||||
kind: "trace" as const,
|
||||
content: traces[traces.length - 1],
|
||||
traces,
|
||||
createdAt: m.timestamp ? Date.parse(m.timestamp) : Date.now(),
|
||||
...(images ? { images } : {}),
|
||||
...(media ? { media } : {}),
|
||||
},
|
||||
];
|
||||
});
|
||||
|
||||
@ -332,6 +332,8 @@
|
||||
"assistantTyping": "Assistant is typing",
|
||||
"toolSingle": "Using a tool",
|
||||
"toolMany": "Used {{count}} tools",
|
||||
"reasoning": "Thinking",
|
||||
"reasoningStreaming": "Thinking…",
|
||||
"imageAttachment": "Image attachment",
|
||||
"copyReply": "Copy reply",
|
||||
"copiedReply": "Copied reply"
|
||||
|
||||
@ -320,6 +320,8 @@
|
||||
"assistantTyping": "助手正在输入",
|
||||
"toolSingle": "正在使用工具",
|
||||
"toolMany": "已使用 {{count}} 个工具",
|
||||
"reasoning": "思考过程",
|
||||
"reasoningStreaming": "正在思考…",
|
||||
"imageAttachment": "图片附件",
|
||||
"copyReply": "复制回复",
|
||||
"copiedReply": "已复制回复"
|
||||
|
||||
@ -89,6 +89,8 @@ export async function fetchSessionMessages(
|
||||
content: string;
|
||||
timestamp?: string;
|
||||
tool_calls?: unknown;
|
||||
reasoning_content?: string | null;
|
||||
thinking_blocks?: unknown;
|
||||
tool_call_id?: string;
|
||||
name?: string;
|
||||
/** Present on ``user`` turns that attached images. Paths have already
|
||||
|
||||
@ -44,6 +44,13 @@ export interface UIMessage {
|
||||
images?: UIImage[];
|
||||
/** Signed or local UI-renderable media attachments. */
|
||||
media?: UIMediaAttachment[];
|
||||
/** Assistant turn: accumulated model reasoning / thinking text. Built up
|
||||
* incrementally from ``reasoning_delta`` frames; finalized when
|
||||
* ``reasoning_end`` arrives. */
|
||||
reasoning?: string;
|
||||
/** True while ``reasoning_delta`` frames are still arriving for this turn.
|
||||
* Drives the shimmer header on ``ReasoningBubble``. */
|
||||
reasoningStreaming?: boolean;
|
||||
}
|
||||
|
||||
export interface ChatSummary {
|
||||
@ -141,7 +148,7 @@ export type InboundEvent =
|
||||
media_urls?: Array<{ url: string; name?: string }>;
|
||||
/** Present when the frame is an agent breadcrumb (e.g. tool hint,
|
||||
* generic progress line) rather than a conversational reply. */
|
||||
kind?: "tool_hint" | "progress";
|
||||
kind?: "tool_hint" | "progress" | "reasoning";
|
||||
}
|
||||
| {
|
||||
event: "delta";
|
||||
@ -154,6 +161,17 @@ export type InboundEvent =
|
||||
chat_id: string;
|
||||
stream_id?: string;
|
||||
}
|
||||
| {
|
||||
event: "reasoning_delta";
|
||||
chat_id: string;
|
||||
text: string;
|
||||
stream_id?: string;
|
||||
}
|
||||
| {
|
||||
event: "reasoning_end";
|
||||
chat_id: string;
|
||||
stream_id?: string;
|
||||
}
|
||||
| {
|
||||
event: "runtime_model_updated";
|
||||
model_name: string;
|
||||
|
||||
@ -72,11 +72,12 @@ describe("MessageBubble", () => {
|
||||
render(<MessageBubble message={message} />);
|
||||
const toggle = screen.getByRole("button", { name: /used 2 tools/i });
|
||||
|
||||
expect(screen.getByText('weather("get")')).toBeInTheDocument();
|
||||
expect(screen.getByText('search "hk weather"')).toBeInTheDocument();
|
||||
expect(screen.queryByText('weather("get")')).not.toBeInTheDocument();
|
||||
expect(screen.queryByText('search "hk weather"')).not.toBeInTheDocument();
|
||||
|
||||
fireEvent.click(toggle);
|
||||
expect(screen.queryByText('weather("get")')).not.toBeInTheDocument();
|
||||
expect(screen.getByText('weather("get")')).toBeInTheDocument();
|
||||
expect(screen.getByText('search "hk weather"')).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("renders video media as an inline player", () => {
|
||||
@ -103,6 +104,45 @@ describe("MessageBubble", () => {
|
||||
expect(container.querySelector("video[controls]")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("auto-expands the reasoning trace while streaming with a shimmer header", () => {
|
||||
const message: UIMessage = {
|
||||
id: "a-reasoning-streaming",
|
||||
role: "assistant",
|
||||
content: "",
|
||||
createdAt: Date.now(),
|
||||
reasoning: "Step 1: parse intent. Step 2: compute.",
|
||||
reasoningStreaming: true,
|
||||
};
|
||||
|
||||
const { container } = render(<MessageBubble message={message} />);
|
||||
|
||||
expect(screen.getByText("Thinking…")).toBeInTheDocument();
|
||||
expect(screen.getByText(/Step 1: parse intent\./)).toBeInTheDocument();
|
||||
expect(container.querySelector(".reasoning-shimmer")).toBeInTheDocument();
|
||||
expect(screen.getByRole("button", { name: /thinking/i }).parentElement).not.toHaveClass("mb-2");
|
||||
});
|
||||
|
||||
it("collapses the reasoning section by default once streaming ends", () => {
|
||||
const message: UIMessage = {
|
||||
id: "a-reasoning-done",
|
||||
role: "assistant",
|
||||
content: "The answer is 42.",
|
||||
createdAt: Date.now(),
|
||||
reasoning: "hidden until expanded",
|
||||
reasoningStreaming: false,
|
||||
};
|
||||
|
||||
render(<MessageBubble message={message} />);
|
||||
|
||||
expect(screen.getByText("Thinking")).toBeInTheDocument();
|
||||
expect(screen.getByText("The answer is 42.")).toBeInTheDocument();
|
||||
expect(screen.queryByText("hidden until expanded")).not.toBeInTheDocument();
|
||||
expect(screen.getByRole("button", { name: /thinking/i }).parentElement).toHaveClass("mb-2");
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /thinking/i }));
|
||||
expect(screen.getByText("hidden until expanded")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("renders assistant image media as a larger generated result", () => {
|
||||
const message: UIMessage = {
|
||||
id: "a-image",
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { fireEvent, render, screen } from "@testing-library/react";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { fireEvent, render, screen, waitFor } from "@testing-library/react";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
import { ThreadComposer } from "@/components/thread/ThreadComposer";
|
||||
import type { SlashCommand } from "@/lib/types";
|
||||
@ -19,6 +19,33 @@ const COMMANDS: SlashCommand[] = [
|
||||
argHint: "[n]",
|
||||
},
|
||||
];
|
||||
const ORIGINAL_INNER_HEIGHT = window.innerHeight;
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
Object.defineProperty(window, "innerHeight", {
|
||||
value: ORIGINAL_INNER_HEIGHT,
|
||||
configurable: true,
|
||||
});
|
||||
});
|
||||
|
||||
function rect(init: Partial<DOMRect>): DOMRect {
|
||||
const top = init.top ?? 0;
|
||||
const left = init.left ?? 0;
|
||||
const width = init.width ?? 0;
|
||||
const height = init.height ?? 0;
|
||||
return {
|
||||
x: init.x ?? left,
|
||||
y: init.y ?? top,
|
||||
top,
|
||||
left,
|
||||
width,
|
||||
height,
|
||||
right: init.right ?? left + width,
|
||||
bottom: init.bottom ?? top + height,
|
||||
toJSON: () => ({}),
|
||||
};
|
||||
}
|
||||
|
||||
describe("ThreadComposer", () => {
|
||||
it("renders a readonly hero model composer when provided", () => {
|
||||
@ -74,7 +101,9 @@ describe("ThreadComposer", () => {
|
||||
const input = screen.getByLabelText("Message input");
|
||||
fireEvent.change(input, { target: { value: "/" } });
|
||||
|
||||
expect(screen.getByRole("listbox", { name: "Slash commands" })).toBeInTheDocument();
|
||||
const palette = screen.getByRole("listbox", { name: "Slash commands" });
|
||||
expect(palette).toBeInTheDocument();
|
||||
expect(palette).toHaveStyle({ maxHeight: "288px" });
|
||||
expect(screen.getByRole("option", { name: /\/stop/i })).toHaveAttribute(
|
||||
"aria-selected",
|
||||
"true",
|
||||
@ -92,6 +121,55 @@ describe("ThreadComposer", () => {
|
||||
expect(screen.queryByRole("listbox", { name: "Slash commands" })).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("opens the slash command palette downward when there is more room below", async () => {
|
||||
vi.spyOn(HTMLFormElement.prototype, "getBoundingClientRect").mockReturnValue(
|
||||
rect({ top: 40, bottom: 160, width: 800, height: 120 }),
|
||||
);
|
||||
Object.defineProperty(window, "innerHeight", {
|
||||
value: 330,
|
||||
configurable: true,
|
||||
});
|
||||
render(
|
||||
<ThreadComposer
|
||||
onSend={vi.fn()}
|
||||
placeholder="Ask anything..."
|
||||
slashCommands={COMMANDS}
|
||||
variant="hero"
|
||||
/>,
|
||||
);
|
||||
const input = screen.getByLabelText("Message input");
|
||||
|
||||
fireEvent.change(input, { target: { value: "/" } });
|
||||
|
||||
await waitFor(() => {
|
||||
const palette = screen.getByRole("listbox", { name: "Slash commands" });
|
||||
expect(palette.className).toContain("top-full");
|
||||
expect(palette).toHaveStyle({ maxHeight: "162px" });
|
||||
});
|
||||
});
|
||||
|
||||
it("dismisses the slash command palette on outside click", () => {
|
||||
render(
|
||||
<div>
|
||||
<button type="button">outside</button>
|
||||
<ThreadComposer
|
||||
onSend={vi.fn()}
|
||||
placeholder="Type your message..."
|
||||
slashCommands={COMMANDS}
|
||||
/>
|
||||
</div>,
|
||||
);
|
||||
|
||||
fireEvent.change(screen.getByLabelText("Message input"), {
|
||||
target: { value: "/" },
|
||||
});
|
||||
expect(screen.getByRole("listbox", { name: "Slash commands" })).toBeInTheDocument();
|
||||
|
||||
fireEvent.pointerDown(screen.getByRole("button", { name: "outside" }));
|
||||
|
||||
expect(screen.queryByRole("listbox", { name: "Slash commands" })).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("sends image generation mode with automatic aspect ratio", () => {
|
||||
const onSend = vi.fn();
|
||||
render(
|
||||
|
||||
52
webui/src/tests/thread-messages.test.tsx
Normal file
52
webui/src/tests/thread-messages.test.tsx
Normal file
@ -0,0 +1,52 @@
|
||||
import { render } from "@testing-library/react";
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { ThreadMessages } from "@/components/thread/ThreadMessages";
|
||||
import type { UIMessage } from "@/lib/types";
|
||||
|
||||
describe("ThreadMessages", () => {
|
||||
it("uses compact spacing between consecutive auxiliary rows", () => {
|
||||
const messages: UIMessage[] = [
|
||||
{
|
||||
id: "r1",
|
||||
role: "assistant",
|
||||
content: "",
|
||||
reasoning: "thinking",
|
||||
reasoningStreaming: false,
|
||||
isStreaming: true,
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
{
|
||||
id: "t1",
|
||||
role: "tool",
|
||||
kind: "trace",
|
||||
content: "search()",
|
||||
traces: ["search()"],
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
{
|
||||
id: "r2",
|
||||
role: "assistant",
|
||||
content: "",
|
||||
reasoning: "more thinking",
|
||||
reasoningStreaming: false,
|
||||
isStreaming: true,
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
{
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
content: "final answer",
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
];
|
||||
|
||||
const { container } = render(<ThreadMessages messages={messages} />);
|
||||
const rows = Array.from(container.firstElementChild?.children ?? []);
|
||||
|
||||
expect(rows[0]).not.toHaveClass("mt-2", "mt-5");
|
||||
expect(rows[1]).toHaveClass("mt-2");
|
||||
expect(rows[2]).toHaveClass("mt-2");
|
||||
expect(rows[3]).toHaveClass("mt-5");
|
||||
});
|
||||
});
|
||||
@ -573,7 +573,7 @@ describe("ThreadShell", () => {
|
||||
await waitFor(() => expect(screen.getByText("live assistant reply")).toBeInTheDocument());
|
||||
});
|
||||
|
||||
it("does not open slash commands on the blank welcome page", async () => {
|
||||
it("opens slash commands on the blank welcome page", async () => {
|
||||
const client = makeClient();
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
@ -583,10 +583,11 @@ describe("ThreadShell", () => {
|
||||
return httpJson({
|
||||
commands: [
|
||||
{
|
||||
command: "/stop",
|
||||
title: "Stop current task",
|
||||
description: "Cancel the active agent turn.",
|
||||
icon: "square",
|
||||
command: "/history",
|
||||
title: "Show conversation history",
|
||||
description: "Print the last N persisted messages.",
|
||||
icon: "history",
|
||||
arg_hint: "[n]",
|
||||
},
|
||||
],
|
||||
});
|
||||
@ -622,7 +623,8 @@ describe("ThreadShell", () => {
|
||||
target: { value: "/" },
|
||||
});
|
||||
|
||||
expect(screen.queryByRole("listbox", { name: "Slash commands" })).not.toBeInTheDocument();
|
||||
expect(screen.getByRole("listbox", { name: "Slash commands" })).toBeInTheDocument();
|
||||
expect(screen.getByRole("option", { name: /\/history/i })).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("switches welcome quick actions when image mode is enabled", async () => {
|
||||
|
||||
@ -113,6 +113,208 @@ describe("useNanobotStream", () => {
|
||||
expect(result.current.messages[1].kind).toBeUndefined();
|
||||
});
|
||||
|
||||
it("accumulates reasoning_delta chunks on a placeholder until reasoning_end", () => {
|
||||
const fake = fakeClient();
|
||||
const { result } = renderHook(() => useNanobotStream("chat-r", EMPTY_MESSAGES), {
|
||||
wrapper: wrap(fake.client),
|
||||
});
|
||||
|
||||
act(() => {
|
||||
fake.emit("chat-r", {
|
||||
event: "reasoning_delta",
|
||||
chat_id: "chat-r",
|
||||
text: "Let me think ",
|
||||
});
|
||||
fake.emit("chat-r", {
|
||||
event: "reasoning_delta",
|
||||
chat_id: "chat-r",
|
||||
text: "step by step.",
|
||||
});
|
||||
});
|
||||
|
||||
expect(result.current.messages).toHaveLength(1);
|
||||
expect(result.current.messages[0].role).toBe("assistant");
|
||||
expect(result.current.messages[0].reasoning).toBe("Let me think step by step.");
|
||||
expect(result.current.messages[0].reasoningStreaming).toBe(true);
|
||||
|
||||
act(() => {
|
||||
fake.emit("chat-r", { event: "reasoning_end", chat_id: "chat-r" });
|
||||
});
|
||||
|
||||
expect(result.current.messages[0].reasoningStreaming).toBe(false);
|
||||
expect(result.current.messages[0].reasoning).toBe("Let me think step by step.");
|
||||
});
|
||||
|
||||
it("absorbs a streaming reasoning placeholder into the answer turn that follows", () => {
|
||||
const fake = fakeClient();
|
||||
const { result } = renderHook(() => useNanobotStream("chat-r2", EMPTY_MESSAGES), {
|
||||
wrapper: wrap(fake.client),
|
||||
});
|
||||
|
||||
act(() => {
|
||||
fake.emit("chat-r2", {
|
||||
event: "reasoning_delta",
|
||||
chat_id: "chat-r2",
|
||||
text: "Plan first.",
|
||||
});
|
||||
fake.emit("chat-r2", { event: "reasoning_end", chat_id: "chat-r2" });
|
||||
fake.emit("chat-r2", {
|
||||
event: "delta",
|
||||
chat_id: "chat-r2",
|
||||
text: "The answer is 42.",
|
||||
});
|
||||
fake.emit("chat-r2", { event: "stream_end", chat_id: "chat-r2" });
|
||||
});
|
||||
|
||||
expect(result.current.messages).toHaveLength(1);
|
||||
expect(result.current.messages[0].content).toBe("The answer is 42.");
|
||||
expect(result.current.messages[0].reasoning).toBe("Plan first.");
|
||||
expect(result.current.messages[0].reasoningStreaming).toBe(false);
|
||||
});
|
||||
|
||||
it("ignores empty reasoning_delta frames", () => {
|
||||
const fake = fakeClient();
|
||||
const { result } = renderHook(() => useNanobotStream("chat-r3", EMPTY_MESSAGES), {
|
||||
wrapper: wrap(fake.client),
|
||||
});
|
||||
|
||||
act(() => {
|
||||
fake.emit("chat-r3", {
|
||||
event: "reasoning_delta",
|
||||
chat_id: "chat-r3",
|
||||
text: "",
|
||||
});
|
||||
});
|
||||
|
||||
expect(result.current.messages).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("treats legacy kind=reasoning messages as a complete delta + end pair", () => {
|
||||
const fake = fakeClient();
|
||||
const { result } = renderHook(() => useNanobotStream("chat-r4", EMPTY_MESSAGES), {
|
||||
wrapper: wrap(fake.client),
|
||||
});
|
||||
|
||||
act(() => {
|
||||
fake.emit("chat-r4", {
|
||||
event: "message",
|
||||
chat_id: "chat-r4",
|
||||
text: "one-shot reasoning",
|
||||
kind: "reasoning",
|
||||
});
|
||||
});
|
||||
|
||||
expect(result.current.messages).toHaveLength(1);
|
||||
expect(result.current.messages[0].reasoning).toBe("one-shot reasoning");
|
||||
expect(result.current.messages[0].reasoningStreaming).toBe(false);
|
||||
});
|
||||
|
||||
it("attaches post-hoc reasoning to the same assistant turn above the answer", () => {
|
||||
const fake = fakeClient();
|
||||
const { result } = renderHook(() => useNanobotStream("chat-r5", EMPTY_MESSAGES), {
|
||||
wrapper: wrap(fake.client),
|
||||
});
|
||||
|
||||
act(() => {
|
||||
fake.emit("chat-r5", {
|
||||
event: "delta",
|
||||
chat_id: "chat-r5",
|
||||
text: "hi~",
|
||||
});
|
||||
fake.emit("chat-r5", { event: "stream_end", chat_id: "chat-r5" });
|
||||
fake.emit("chat-r5", {
|
||||
event: "reasoning_delta",
|
||||
chat_id: "chat-r5",
|
||||
text: "This reasoning arrived after the answer stream.",
|
||||
});
|
||||
fake.emit("chat-r5", { event: "reasoning_end", chat_id: "chat-r5" });
|
||||
});
|
||||
|
||||
expect(result.current.messages).toHaveLength(1);
|
||||
expect(result.current.messages[0].content).toBe("hi~");
|
||||
expect(result.current.messages[0].reasoning).toBe(
|
||||
"This reasoning arrived after the answer stream.",
|
||||
);
|
||||
expect(result.current.messages[0].reasoningStreaming).toBe(false);
|
||||
});
|
||||
|
||||
it("does not attach a new turn's reasoning across the latest user boundary", () => {
|
||||
const fake = fakeClient();
|
||||
const initialMessages = [
|
||||
{
|
||||
id: "a-prev",
|
||||
role: "assistant" as const,
|
||||
content: "Previous answer.",
|
||||
reasoning: "Previous thought.",
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
{
|
||||
id: "u-next",
|
||||
role: "user" as const,
|
||||
content: "Next question",
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
];
|
||||
const { result } = renderHook(
|
||||
() => useNanobotStream("chat-r6", initialMessages),
|
||||
{ wrapper: wrap(fake.client) },
|
||||
);
|
||||
|
||||
act(() => {
|
||||
fake.emit("chat-r6", {
|
||||
event: "reasoning_delta",
|
||||
chat_id: "chat-r6",
|
||||
text: "New turn thinking.",
|
||||
});
|
||||
});
|
||||
|
||||
expect(result.current.messages).toHaveLength(3);
|
||||
expect(result.current.messages[0].reasoning).toBe("Previous thought.");
|
||||
expect(result.current.messages[2].role).toBe("assistant");
|
||||
expect(result.current.messages[2].content).toBe("");
|
||||
expect(result.current.messages[2].reasoning).toBe("New turn thinking.");
|
||||
expect(result.current.messages[2].reasoningStreaming).toBe(true);
|
||||
});
|
||||
|
||||
it("does not attach reasoning across a tool trace boundary", () => {
|
||||
const fake = fakeClient();
|
||||
const { result } = renderHook(() => useNanobotStream("chat-r7", EMPTY_MESSAGES), {
|
||||
wrapper: wrap(fake.client),
|
||||
});
|
||||
|
||||
act(() => {
|
||||
fake.emit("chat-r7", {
|
||||
event: "reasoning_delta",
|
||||
chat_id: "chat-r7",
|
||||
text: "First reasoning.",
|
||||
});
|
||||
fake.emit("chat-r7", { event: "reasoning_end", chat_id: "chat-r7" });
|
||||
fake.emit("chat-r7", {
|
||||
event: "message",
|
||||
chat_id: "chat-r7",
|
||||
text: "web_search({\"query\":\"OpenClaw\"})",
|
||||
kind: "tool_hint",
|
||||
});
|
||||
fake.emit("chat-r7", {
|
||||
event: "reasoning_delta",
|
||||
chat_id: "chat-r7",
|
||||
text: "Second reasoning.",
|
||||
});
|
||||
});
|
||||
|
||||
expect(result.current.messages).toHaveLength(3);
|
||||
expect(result.current.messages.map((m) => m.kind ?? "message")).toEqual([
|
||||
"message",
|
||||
"trace",
|
||||
"message",
|
||||
]);
|
||||
expect(result.current.messages[0].reasoning).toBe("First reasoning.");
|
||||
expect(result.current.messages[1].traces).toEqual([
|
||||
"web_search({\"query\":\"OpenClaw\"})",
|
||||
]);
|
||||
expect(result.current.messages[2].reasoning).toBe("Second reasoning.");
|
||||
});
|
||||
|
||||
it("attaches assistant media_urls to complete messages", () => {
|
||||
const fake = fakeClient();
|
||||
const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), {
|
||||
|
||||
@ -170,6 +170,92 @@ describe("useSessions", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("hydrates persisted assistant reasoning into the replayed message", async () => {
|
||||
vi.mocked(api.fetchSessionMessages).mockResolvedValue({
|
||||
key: "websocket:chat-reasoning",
|
||||
created_at: "2026-04-20T10:00:00Z",
|
||||
updated_at: "2026-04-20T10:05:00Z",
|
||||
messages: [
|
||||
{
|
||||
role: "assistant",
|
||||
content: "final answer",
|
||||
timestamp: "2026-04-20T10:00:01Z",
|
||||
reasoning_content: "hidden but persisted reasoning",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const { result } = renderHook(() => useSessionHistory("websocket:chat-reasoning"), {
|
||||
wrapper: wrap(fakeClient()),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.loading).toBe(false));
|
||||
|
||||
expect(result.current.messages).toHaveLength(1);
|
||||
expect(result.current.messages[0].role).toBe("assistant");
|
||||
expect(result.current.messages[0].content).toBe("final answer");
|
||||
expect(result.current.messages[0].reasoning).toBe("hidden but persisted reasoning");
|
||||
expect(result.current.messages[0].reasoningStreaming).toBe(false);
|
||||
});
|
||||
|
||||
it("hydrates historical assistant tool calls into a replay trace row", async () => {
|
||||
vi.mocked(api.fetchSessionMessages).mockResolvedValue({
|
||||
key: "websocket:chat-tools",
|
||||
created_at: "2026-04-20T10:00:00Z",
|
||||
updated_at: "2026-04-20T10:05:00Z",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "research this",
|
||||
timestamp: "2026-04-20T10:00:00Z",
|
||||
},
|
||||
{
|
||||
role: "assistant",
|
||||
content: "",
|
||||
timestamp: "2026-04-20T10:00:01Z",
|
||||
tool_calls: [
|
||||
{
|
||||
id: "call-1",
|
||||
type: "function",
|
||||
function: { name: "web_search", arguments: "{\"query\":\"agents\"}" },
|
||||
},
|
||||
{
|
||||
id: "call-2",
|
||||
type: "function",
|
||||
function: { name: "web_fetch", arguments: "{\"url\":\"https://example.com\"}" },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
role: "tool",
|
||||
content: "tool output that should not render directly",
|
||||
timestamp: "2026-04-20T10:00:02Z",
|
||||
tool_call_id: "call-1",
|
||||
},
|
||||
{
|
||||
role: "assistant",
|
||||
content: "summary",
|
||||
timestamp: "2026-04-20T10:00:03Z",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const { result } = renderHook(() => useSessionHistory("websocket:chat-tools"), {
|
||||
wrapper: wrap(fakeClient()),
|
||||
});
|
||||
|
||||
await waitFor(() => expect(result.current.loading).toBe(false));
|
||||
|
||||
expect(result.current.messages.map((m) => m.role)).toEqual(["user", "tool", "assistant"]);
|
||||
const trace = result.current.messages[1];
|
||||
expect(trace.kind).toBe("trace");
|
||||
expect(trace.traces).toEqual([
|
||||
"web_search({\"query\":\"agents\"})",
|
||||
"web_fetch({\"url\":\"https://example.com\"})",
|
||||
]);
|
||||
expect(result.current.messages[2].content).toBe("summary");
|
||||
});
|
||||
|
||||
it("flags history with trailing assistant tool calls as still pending", async () => {
|
||||
vi.mocked(api.fetchSessionMessages).mockResolvedValue({
|
||||
key: "websocket:chat-pending",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user