diff --git a/docs/configuration.md b/docs/configuration.md index 01d55c20b..01ef46814 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -677,6 +677,7 @@ Global settings that apply to all channels. Configure under the `channels` secti |---------|---------|-------------| | `sendProgress` | `true` | Stream agent's text progress to the channel | | `sendToolHints` | `false` | Stream tool-call hints (e.g. `read_file("…")`) | +| `showReasoning` | `false` | Surface model reasoning/thinking content (DeepSeek-R1 `reasoning_content`, Anthropic `thinking_blocks`, inline `` tags). Independent of `sendProgress`. | | `sendMaxRetries` | `3` | Max delivery attempts per outbound message, including the initial send (0-10 configured, minimum 1 actual attempt) | | `transcriptionProvider` | `"groq"` | Voice transcription backend: `"groq"` (free tier, default) or `"openai"`. API key is auto-resolved from the matching provider config. | | `transcriptionLanguage` | `null` | Optional ISO-639-1 language hint for audio transcription, e.g. `"en"`, `"ko"`, `"ja"`. | diff --git a/nanobot/agent/hook.py b/nanobot/agent/hook.py index 5e4ea4d4d..86775742d 100644 --- a/nanobot/agent/hook.py +++ b/nanobot/agent/hook.py @@ -22,6 +22,7 @@ class AgentHookContext: tool_results: list[Any] = field(default_factory=list) tool_events: list[dict[str, str]] = field(default_factory=list) streamed_content: bool = False + streamed_reasoning: bool = False final_content: str | None = None stop_reason: str | None = None error: str | None = None diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 9d2899b04..028d9ddd9 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -48,7 +48,7 @@ from nanobot.providers.factory import ProviderSnapshot from nanobot.session.manager import Session, SessionManager from nanobot.utils.artifacts import generated_image_paths_from_messages from nanobot.utils.document import extract_documents -from nanobot.utils.helpers import image_placeholder_text +from nanobot.utils.helpers import IncrementalThinkExtractor, image_placeholder_text from nanobot.utils.helpers import truncate_text as truncate_text_fn from nanobot.utils.image_generation_intent import image_generation_prompt from nanobot.utils.progress_events import ( @@ -101,22 +101,21 @@ class _LoopHook(AgentHook): self._metadata = metadata or {} self._session_key = session_key self._stream_buf = "" - self._emitted_thinking = "" + self._think_extractor = IncrementalThinkExtractor() def wants_streaming(self) -> bool: return self._on_stream is not None async def on_stream(self, context: AgentHookContext, delta: str) -> None: - from nanobot.utils.helpers import emit_incremental_think, strip_think + from nanobot.utils.helpers import strip_think prev_clean = strip_think(self._stream_buf) self._stream_buf += delta new_clean = strip_think(self._stream_buf) incremental = new_clean[len(prev_clean) :] - self._emitted_thinking = await emit_incremental_think( - self._stream_buf, self._emitted_thinking, self.emit_reasoning, - ) + if await self._think_extractor.feed(self._stream_buf, self.emit_reasoning): + context.streamed_reasoning = True if incremental and self._on_stream: await self._on_stream(incremental) @@ -125,7 +124,7 @@ class _LoopHook(AgentHook): if self._on_stream_end: await self._on_stream_end(resuming=resuming) self._stream_buf = "" - self._emitted_thinking = "" + self._think_extractor.reset() async def before_iteration(self, context: AgentHookContext) -> None: self._loop._current_iteration = context.iteration diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index 9a1cc6d65..2713359be 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -17,11 +17,11 @@ from nanobot.agent.tools.ask import AskUserInterrupt from nanobot.agent.tools.registry import ToolRegistry from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest from nanobot.utils.helpers import ( + IncrementalThinkExtractor, build_assistant_message, - emit_incremental_think, estimate_message_tokens, estimate_prompt_tokens_chain, - extract_think, + extract_reasoning, find_legal_message_start, maybe_persist_tool_result, strip_think, @@ -284,24 +284,15 @@ class AgentRunner: context.tool_calls = list(response.tool_calls) self._accumulate_usage(usage, raw_usage) - if response.reasoning_content: - if not context.streamed_content: - await hook.emit_reasoning(response.reasoning_content) - if response.content: - response.content = strip_think(response.content) - elif response.thinking_blocks: - # Anthropic extended thinking: extract from thinking_blocks. - if not context.streamed_content: - parts = [tb.get("thinking", "") for tb in response.thinking_blocks if tb.get("type") == "thinking"] - if parts: - await hook.emit_reasoning("\n\n".join(parts)) - elif response.content: - inline_thinking, clean_content = extract_think(response.content) - if inline_thinking: - # Only emit if streaming didn't already handle it. - if not context.streamed_content: - await hook.emit_reasoning(inline_thinking) - response.content = clean_content + reasoning_text, cleaned_content = extract_reasoning( + response.reasoning_content, + response.thinking_blocks, + response.content, + ) + response.content = cleaned_content + if reasoning_text and not context.streamed_reasoning: + await hook.emit_reasoning(reasoning_text) + context.streamed_reasoning = True if response.should_execute_tools: tool_calls = list(response.tool_calls) @@ -654,10 +645,10 @@ class AgentRunner: ) elif wants_progress_streaming: stream_buf = "" - emitted_thinking = "" + think_extractor = IncrementalThinkExtractor() async def _stream_progress(delta: str) -> None: - nonlocal stream_buf, emitted_thinking + nonlocal stream_buf if not delta: return prev_clean = strip_think(stream_buf) @@ -665,9 +656,8 @@ class AgentRunner: new_clean = strip_think(stream_buf) incremental = new_clean[len(prev_clean):] - emitted_thinking = await emit_incremental_think( - stream_buf, emitted_thinking, hook.emit_reasoning, - ) + if await think_extractor.feed(stream_buf, hook.emit_reasoning): + context.streamed_reasoning = True if incremental: context.streamed_content = True diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 1c835962a..467683ed9 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -275,17 +275,17 @@ async def _maybe_print_interactive_progress( is_tool_hint = metadata.get("_tool_hint", False) is_reasoning = metadata.get("_reasoning", False) + if is_reasoning: + if channels_config and not channels_config.show_reasoning: + return True + _print_cli_reasoning(msg.content, thinking, renderer) + return True if channels_config and is_tool_hint and not channels_config.send_tool_hints: return True if channels_config and not is_tool_hint and not channels_config.send_progress: return True - if is_reasoning and channels_config and not channels_config.show_reasoning: - return True - if is_reasoning: - _print_cli_reasoning(msg.content, thinking, renderer) - else: - await _print_interactive_progress_line(msg.content, thinking, renderer) + await _print_interactive_progress_line(msg.content, thinking, renderer) return True @@ -1147,16 +1147,16 @@ def agent( def _make_progress(renderer: StreamRenderer | None = None): async def _cli_progress(content: str, *, tool_hint: bool = False, reasoning: bool = False, **_kwargs: Any) -> None: ch = agent_loop.channels_config + if reasoning: + if ch and not ch.show_reasoning: + return + _print_cli_reasoning(content, _thinking, renderer) + return if ch and tool_hint and not ch.send_tool_hints: return if ch and not tool_hint and not ch.send_progress: return - if reasoning and ch and not ch.show_reasoning: - return - if reasoning: - _print_cli_reasoning(content, _thinking, renderer) - else: - _print_cli_progress_line(content, _thinking, renderer) + _print_cli_progress_line(content, _thinking, renderer) return _cli_progress if message: diff --git a/nanobot/utils/helpers.py b/nanobot/utils/helpers.py index 5301f4885..f348bc183 100644 --- a/nanobot/utils/helpers.py +++ b/nanobot/utils/helpers.py @@ -72,17 +72,11 @@ def strip_think(text: str) -> str: def extract_think(text: str) -> tuple[str | None, str]: - """Extract thinking/reasoning content from and tags. + """Extract thinking content from inline ```` / ```` blocks. - Returns (thinking_text, cleaned_text) where: - - thinking_text: concatenated content from all ... and - ... blocks, or None if none found. - - cleaned_text: the input with all thinking blocks removed (same as - strip_think()). - - Only extracts from well-formed closed blocks. Unclosed trailing tags - (common during streaming) are stripped without extraction — use - strip_think() for pure streaming cleanup. + Returns ``(thinking_text, cleaned_text)``. Only closed blocks are + extracted; unclosed streaming prefixes are stripped from the cleaned + text but not surfaced — :func:`strip_think` handles that case. """ parts: list[str] = [] for m in re.finditer(r"([\s\S]*?)", text): @@ -93,23 +87,75 @@ def extract_think(text: str) -> tuple[str | None, str]: return thinking, strip_think(text) -async def emit_incremental_think( - buf: str, - emitted: str, - emit_fn: Any, -) -> str: - """Extract new thinking from buf and emit if not yet emitted. +class IncrementalThinkExtractor: + """Stateful inline ```` extractor for streaming buffers. - Returns the updated emitted state. *emit_fn* is an async callable - that accepts a single reasoning string (e.g. ``hook.emit_reasoning``). + Streaming providers expose only a single content delta channel. When a + model embeds reasoning in ``...`` blocks inside that + channel, callers need to surface the reasoning incrementally as it + arrives without re-emitting earlier text. This holds the "already + emitted" cursor so the runner and the loop hook share one shape. """ - thinking, _ = extract_think(buf) - if thinking and thinking != emitted: - new = thinking[len(emitted):] - if new.strip(): - await emit_fn(new.strip()) - return thinking - return emitted + + __slots__ = ("_emitted",) + + def __init__(self) -> None: + self._emitted = "" + + def reset(self) -> None: + self._emitted = "" + + async def feed(self, buf: str, emit: Any) -> bool: + """Emit any new thinking text found in ``buf``. + + Returns True if anything was emitted this call. ``emit`` is an + async callable taking a single string (typically + ``hook.emit_reasoning``). + """ + thinking, _ = extract_think(buf) + if not thinking or thinking == self._emitted: + return False + new = thinking[len(self._emitted):].strip() + self._emitted = thinking + if not new: + return False + await emit(new) + return True + + +def extract_reasoning( + reasoning_content: str | None, + thinking_blocks: list[dict[str, Any]] | None, + content: str | None, +) -> tuple[str | None, str | None]: + """Return ``(reasoning_text, cleaned_content)`` from one model response. + + Single source of truth for "what reasoning did this response carry, and + what answer text remains after we peel it out". Fallback order: + + 1. Dedicated ``reasoning_content`` (DeepSeek-R1, Kimi, MiMo, OpenAI + reasoning models, Bedrock). + 2. Anthropic ``thinking_blocks``. + 3. Inline ```` / ```` blocks in ``content``. + + Only one source contributes per response; lower-priority sources are + ignored if a higher-priority one is present, but inline ```` + tags are still stripped from ``content`` so they never leak into the + final answer. + """ + if reasoning_content: + return reasoning_content, strip_think(content) if content else content + if thinking_blocks: + parts = [ + tb.get("thinking", "") + for tb in thinking_blocks + if isinstance(tb, dict) and tb.get("type") == "thinking" + ] + joined = "\n\n".join(p for p in parts if p) + return (joined or None), strip_think(content) if content else content + if content: + return extract_think(content) + return None, content def detect_image_mime(data: bytes) -> str | None: diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py index 850e3caea..d50b82cd4 100644 --- a/tests/agent/test_runner.py +++ b/tests/agent/test_runner.py @@ -227,6 +227,111 @@ async def test_runner_prefers_reasoning_content_over_inline_think(): assert emitted_reasoning[0] == "dedicated reasoning field" +@pytest.mark.asyncio +async def test_runner_emits_reasoning_content_even_when_answer_was_streamed(): + """`reasoning_content` arrives only on the final response; streaming the + answer must not suppress it (the answer stream and the reasoning channel + are independent — only the reasoning-already-emitted bit matters).""" + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + provider.supports_progress_deltas = True + emitted_reasoning: list[str] = [] + + async def chat_stream_with_retry(*, on_content_delta=None, **kwargs): + if on_content_delta: + await on_content_delta("The ") + await on_content_delta("answer.") + return LLMResponse( + content="The answer.", + reasoning_content="step-by-step deduction", + tool_calls=[], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + + provider.chat_stream_with_retry = chat_stream_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + class ReasoningHook(AgentHook): + async def emit_reasoning(self, reasoning_content: str | None) -> None: + if reasoning_content: + emitted_reasoning.append(reasoning_content) + + progress_calls: list[str] = [] + + async def _progress(content: str, **_kwargs): + progress_calls.append(content) + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "question"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=ReasoningHook(), + stream_progress_deltas=True, + progress_callback=_progress, + )) + + assert result.final_content == "The answer." + # The answer must have streamed AND the dedicated reasoning_content must + # have been emitted exactly once after the stream completed. + assert progress_calls, "answer should have streamed via progress callback" + assert emitted_reasoning == ["step-by-step deduction"] + + +@pytest.mark.asyncio +async def test_runner_does_not_double_emit_when_inline_think_already_streamed(): + """Inline `` blocks streamed incrementally during the answer + stream must not be re-emitted from the final response.""" + from nanobot.agent.hook import AgentHook, AgentHookContext + from nanobot.agent.runner import AgentRunSpec, AgentRunner + + provider = MagicMock() + provider.supports_progress_deltas = True + emitted_reasoning: list[str] = [] + + async def chat_stream_with_retry(*, on_content_delta=None, **kwargs): + if on_content_delta: + await on_content_delta("working...") + await on_content_delta("The answer.") + return LLMResponse( + content="working...The answer.", + tool_calls=[], + usage={"prompt_tokens": 5, "completion_tokens": 3}, + ) + + provider.chat_stream_with_retry = chat_stream_with_retry + tools = MagicMock() + tools.get_definitions.return_value = [] + + class ReasoningHook(AgentHook): + async def emit_reasoning(self, reasoning_content: str | None) -> None: + if reasoning_content: + emitted_reasoning.append(reasoning_content) + + async def _progress(content: str, **_kwargs): + pass + + runner = AgentRunner(provider) + result = await runner.run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "question"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=_MAX_TOOL_RESULT_CHARS, + hook=ReasoningHook(), + stream_progress_deltas=True, + progress_callback=_progress, + )) + + assert result.final_content == "The answer." + assert emitted_reasoning == ["working..."] + + @pytest.mark.asyncio async def test_runner_calls_hooks_in_order(): from nanobot.agent.hook import AgentHook, AgentHookContext diff --git a/tests/cli/test_interactive_retry_wait.py b/tests/cli/test_interactive_retry_wait.py index e693b057c..7ddef1c48 100644 --- a/tests/cli/test_interactive_retry_wait.py +++ b/tests/cli/test_interactive_retry_wait.py @@ -88,3 +88,26 @@ async def test_non_reasoning_progress_not_affected_by_show_reasoning(): assert handled is True assert calls == ["working on it..."] + + +@pytest.mark.asyncio +async def test_reasoning_shown_when_send_progress_disabled(): + """Reasoning display is governed by `show_reasoning` alone, independent + of `send_progress` — the two knobs are orthogonal.""" + calls: list[str] = [] + channels_config = SimpleNamespace( + send_progress=False, send_tool_hints=False, show_reasoning=True, + ) + msg = SimpleNamespace( + content="Let me think about this...", + metadata={"_progress": True, "_reasoning": True}, + ) + + with patch( + "nanobot.cli.commands._print_cli_reasoning", + side_effect=lambda t, th, r=None: calls.append(t), + ): + handled = await commands._maybe_print_interactive_progress(msg, None, channels_config) + + assert handled is True + assert calls == ["Let me think about this..."] diff --git a/tests/utils/test_strip_think.py b/tests/utils/test_strip_think.py index 65d952ad1..f1048f40c 100644 --- a/tests/utils/test_strip_think.py +++ b/tests/utils/test_strip_think.py @@ -1,4 +1,4 @@ -from nanobot.utils.helpers import extract_think, strip_think +from nanobot.utils.helpers import extract_reasoning, extract_think, strip_think class TestStripThinkTag: @@ -225,3 +225,49 @@ squares = [x**2 for x in range(10)] assert "List comprehensions in Python" in clean assert "" not in clean assert "" not in clean + + +class TestExtractReasoning: + """Single source of truth for reasoning extraction across all providers.""" + + def test_prefers_reasoning_content_and_strips_inline_think(self): + # Dedicated field wins; inline tags are still scrubbed from content. + reasoning, content = extract_reasoning( + "dedicated", + None, + "inlinevisible answer", + ) + assert reasoning == "dedicated" + assert content == "visible answer" + + def test_falls_back_to_thinking_blocks(self): + reasoning, content = extract_reasoning( + None, + [ + {"type": "thinking", "thinking": "step 1"}, + {"type": "thinking", "thinking": "step 2"}, + {"type": "redacted_thinking"}, + ], + "hello", + ) + assert reasoning == "step 1\n\nstep 2" + assert content == "hello" + + def test_falls_back_to_inline_think_tags(self): + reasoning, content = extract_reasoning( + None, None, "plananswer" + ) + assert reasoning == "plan" + assert content == "answer" + + def test_no_reasoning_returns_none(self): + reasoning, content = extract_reasoning(None, None, "plain answer") + assert reasoning is None + assert content == "plain answer" + + def test_empty_thinking_blocks_falls_through_to_inline(self): + reasoning, content = extract_reasoning( + None, [], "plananswer" + ) + assert reasoning == "plan" + assert content == "answer"