feat(reasoning): add inline think tag extraction and Anthropic thinking_blocks support

Add extract_think() and emit_incremental_think() helpers to extract thinking content from inline <think> and <thought> tags in the content field. This handles models served via Ollama, self-hosted vLLM, or other compatible endpoints that embed reasoning as inline tags instead of using the dedicated reasoning_content API field. Also adds Anthropic thinking_blocks support for extended thinking via the thinking content blocks array. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-19 16:12:30 +00:00 · 2026-05-12 23:02:59 +08:00 · 2026-05-12 23:02:59 +08:00 · 3a851f8f8d
commit 3a851f8f8d
parent 3a27af0018
5 changed files with 283 additions and 4 deletions
--- a/nanobot/agent/loop.py
+++ b/nanobot/agent/loop.py
@ -101,17 +101,23 @@ class _LoopHook(AgentHook):
        self._metadata = metadata or {}
        self._session_key = session_key
        self._stream_buf = ""
+        self._emitted_thinking = ""

    def wants_streaming(self) -> bool:
        return self._on_stream is not None

    async def on_stream(self, context: AgentHookContext, delta: str) -> None:
-        from nanobot.utils.helpers import strip_think
+        from nanobot.utils.helpers import emit_incremental_think, strip_think

        prev_clean = strip_think(self._stream_buf)
        self._stream_buf += delta
        new_clean = strip_think(self._stream_buf)
        incremental = new_clean[len(prev_clean) :]
+
+        self._emitted_thinking = await emit_incremental_think(
+            self._stream_buf, self._emitted_thinking, self.emit_reasoning,
+        )
+
        if incremental and self._on_stream:
            await self._on_stream(incremental)

@ -119,6 +125,7 @@ class _LoopHook(AgentHook):
        if self._on_stream_end:
            await self._on_stream_end(resuming=resuming)
        self._stream_buf = ""
+        self._emitted_thinking = ""

    async def before_iteration(self, context: AgentHookContext) -> None:
        self._loop._current_iteration = context.iteration
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@ -18,8 +18,10 @@ from nanobot.agent.tools.registry import ToolRegistry
 from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
 from nanobot.utils.helpers import (
    build_assistant_message,
+    emit_incremental_think,
    estimate_message_tokens,
    estimate_prompt_tokens_chain,
+    extract_think,
    find_legal_message_start,
    maybe_persist_tool_result,
    strip_think,
@ -283,7 +285,23 @@ class AgentRunner:
            self._accumulate_usage(usage, raw_usage)

            if response.reasoning_content:
-                await hook.emit_reasoning(response.reasoning_content)
+                if not context.streamed_content:
+                    await hook.emit_reasoning(response.reasoning_content)
+                if response.content:
+                    response.content = strip_think(response.content)
+            elif response.thinking_blocks:
+                # Anthropic extended thinking: extract from thinking_blocks.
+                if not context.streamed_content:
+                    parts = [tb.get("thinking", "") for tb in response.thinking_blocks if tb.get("type") == "thinking"]
+                    if parts:
+                        await hook.emit_reasoning("\n\n".join(parts))
+            elif response.content:
+                inline_thinking, clean_content = extract_think(response.content)
+                if inline_thinking:
+                    # Only emit if streaming didn't already handle it.
+                    if not context.streamed_content:
+                        await hook.emit_reasoning(inline_thinking)
+                    response.content = clean_content

            if response.should_execute_tools:
                tool_calls = list(response.tool_calls)
@ -636,15 +654,21 @@ class AgentRunner:
            )
        elif wants_progress_streaming:
            stream_buf = ""
+            emitted_thinking = ""

            async def _stream_progress(delta: str) -> None:
-                nonlocal stream_buf
+                nonlocal stream_buf, emitted_thinking
                if not delta:
                    return
                prev_clean = strip_think(stream_buf)
                stream_buf += delta
                new_clean = strip_think(stream_buf)
                incremental = new_clean[len(prev_clean):]
+
+                emitted_thinking = await emit_incremental_think(
+                    stream_buf, emitted_thinking, hook.emit_reasoning,
+                )
+
                if incremental:
                    context.streamed_content = True
                    await spec.progress_callback(incremental)
--- a/nanobot/utils/helpers.py
+++ b/nanobot/utils/helpers.py
@ -71,6 +71,47 @@ def strip_think(text: str) -> str:
    return text.strip()


+def extract_think(text: str) -> tuple[str | None, str]:
+    """Extract thinking/reasoning content from <think> and <thought> tags.
+
+    Returns (thinking_text, cleaned_text) where:
+      - thinking_text: concatenated content from all <think>...</think> and
+        <thought>...</thought> blocks, or None if none found.
+      - cleaned_text: the input with all thinking blocks removed (same as
+        strip_think()).
+
+    Only extracts from well-formed closed blocks. Unclosed trailing tags
+    (common during streaming) are stripped without extraction — use
+    strip_think() for pure streaming cleanup.
+    """
+    parts: list[str] = []
+    for m in re.finditer(r"<think>([\s\S]*?)</think>", text):
+        parts.append(m.group(1).strip())
+    for m in re.finditer(r"<thought>([\s\S]*?)</thought>", text):
+        parts.append(m.group(1).strip())
+    thinking = "\n\n".join(parts) if parts else None
+    return thinking, strip_think(text)
+
+
+async def emit_incremental_think(
+    buf: str,
+    emitted: str,
+    emit_fn: Any,
+) -> str:
+    """Extract new thinking from buf and emit if not yet emitted.
+
+    Returns the updated emitted state.  *emit_fn* is an async callable
+    that accepts a single reasoning string (e.g. ``hook.emit_reasoning``).
+    """
+    thinking, _ = extract_think(buf)
+    if thinking and thinking != emitted:
+        new = thinking[len(emitted):]
+        if new.strip():
+            await emit_fn(new.strip())
+        return thinking
+    return emitted
+
+
 def detect_image_mime(data: bytes) -> str | None:
    """Detect image MIME type from magic bytes, ignoring file extension."""
    if data[:8] == b"\x89PNG\r\n\x1a\n":
--- a/tests/agent/test_runner.py
+++ b/tests/agent/test_runner.py
@ -101,6 +101,132 @@ async def test_runner_preserves_reasoning_fields_and_tool_results():
    )


+@pytest.mark.asyncio
+async def test_runner_emits_anthropic_thinking_blocks():
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    emitted_reasoning: list[str] = []
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(
+            content="The answer is 42.",
+            thinking_blocks=[
+                {"type": "thinking", "thinking": "Let me analyze this step by step.", "signature": "sig1"},
+                {"type": "thinking", "thinking": "After careful consideration.", "signature": "sig2"},
+            ],
+            tool_calls=[],
+            usage={"prompt_tokens": 5, "completion_tokens": 3},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    class ReasoningHook(AgentHook):
+        async def emit_reasoning(self, reasoning_content: str | None) -> None:
+            if reasoning_content:
+                emitted_reasoning.append(reasoning_content)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "question"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=ReasoningHook(),
+    ))
+
+    assert result.final_content == "The answer is 42."
+    assert len(emitted_reasoning) == 1
+    assert "Let me analyze this" in emitted_reasoning[0]
+    assert "After careful consideration" in emitted_reasoning[0]
+
+
+@pytest.mark.asyncio
+async def test_runner_emits_inline_think_content_as_reasoning():
+    """Models returning <think>...</think> in content should have thinking extracted and emitted."""
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    emitted_reasoning: list[str] = []
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(
+            content="<think>Let me think about this...\nThe answer is 42.</think>The answer is 42.",
+            tool_calls=[],
+            usage={"prompt_tokens": 5, "completion_tokens": 3},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    class ReasoningHook(AgentHook):
+        async def emit_reasoning(self, reasoning_content: str | None) -> None:
+            if reasoning_content:
+                emitted_reasoning.append(reasoning_content)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "what is the answer?"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=ReasoningHook(),
+    ))
+
+    assert result.final_content == "The answer is 42."
+    assert len(emitted_reasoning) == 1
+    assert "Let me think about this" in emitted_reasoning[0]
+    assert "The answer is 42" in emitted_reasoning[0]
+
+
+@pytest.mark.asyncio
+async def test_runner_prefers_reasoning_content_over_inline_think():
+    from nanobot.agent.hook import AgentHook, AgentHookContext
+    from nanobot.agent.runner import AgentRunSpec, AgentRunner
+
+    provider = MagicMock()
+    emitted_reasoning: list[str] = []
+
+    async def chat_with_retry(**kwargs):
+        return LLMResponse(
+            content="<think>inline thinking</think>The answer.",
+            reasoning_content="dedicated reasoning field",
+            tool_calls=[],
+            usage={"prompt_tokens": 5, "completion_tokens": 3},
+        )
+
+    provider.chat_with_retry = chat_with_retry
+    tools = MagicMock()
+    tools.get_definitions.return_value = []
+
+    class ReasoningHook(AgentHook):
+        async def emit_reasoning(self, reasoning_content: str | None) -> None:
+            if reasoning_content:
+                emitted_reasoning.append(reasoning_content)
+
+    runner = AgentRunner(provider)
+    result = await runner.run(AgentRunSpec(
+        initial_messages=[{"role": "user", "content": "question"}],
+        tools=tools,
+        model="test-model",
+        max_iterations=3,
+        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
+        hook=ReasoningHook(),
+    ))
+
+    assert result.final_content == "The answer."
+    # Only the dedicated field should be emitted, not the inline <think> content
+    assert len(emitted_reasoning) == 1
+    assert emitted_reasoning[0] == "dedicated reasoning field"
+
+
@pytest.mark.asyncio
 async def test_runner_calls_hooks_in_order():
    from nanobot.agent.hook import AgentHook, AgentHookContext
--- a/tests/utils/test_strip_think.py
+++ b/tests/utils/test_strip_think.py
@ -1,4 +1,4 @@
-from nanobot.utils.helpers import strip_think
+from nanobot.utils.helpers import extract_think, strip_think


 class TestStripThinkTag:
@ -144,3 +144,84 @@ class TestStripThinkConservativePreserve:
    def test_literal_channel_marker_in_code_block_preserved(self):
        text = "Example:\n```\nif line.startswith('<channel|>'):\n    skip()\n```"
        assert strip_think(text) == text
+
+
+class TestExtractThink:
+
+    def test_no_think_tags(self):
+        thinking, clean = extract_think("Hello World")
+        assert thinking is None
+        assert clean == "Hello World"
+
+    def test_single_think_block(self):
+        text = "Hello <think>reasoning content\nhere</think> World"
+        thinking, clean = extract_think(text)
+        assert thinking == "reasoning content\nhere"
+        assert clean == "Hello  World"
+
+    def test_single_thought_block(self):
+        text = "Hello <thought>reasoning content</thought> World"
+        thinking, clean = extract_think(text)
+        assert thinking == "reasoning content"
+        assert clean == "Hello  World"
+
+    def test_multiple_think_blocks(self):
+        text = "A<think>first</think>B<thought>second</thought>C"
+        thinking, clean = extract_think(text)
+        assert thinking == "first\n\nsecond"
+        assert clean == "ABC"
+
+    def test_think_only_no_content(self):
+        text = "<think>just thinking</think>"
+        thinking, clean = extract_think(text)
+        assert thinking == "just thinking"
+        assert clean == ""
+
+    def test_unclosed_think_not_extracted(self):
+        # Unclosed blocks at start are stripped but NOT extracted
+        text = "<think>unclosed thinking..."
+        thinking, clean = extract_think(text)
+        assert thinking is None
+        assert clean == ""
+
+    def test_empty_think_block(self):
+        text = "Hello <think></think> World"
+        thinking, clean = extract_think(text)
+        # Empty blocks result in empty string after strip
+        assert thinking == ""
+        assert clean == "Hello  World"
+
+    def test_think_with_whitespace_only(self):
+        text = "Hello <think>   \n World"
+        thinking, clean = extract_think(text)
+        assert thinking is None
+        assert clean == "Hello <think>   \n World"
+
+    def test_mixed_think_and_thought(self):
+        text = "Start<think>first reasoning</think>middle<thought>second reasoning</thought>End"
+        thinking, clean = extract_think(text)
+        assert thinking == "first reasoning\n\nsecond reasoning"
+        assert clean == "StartmiddleEnd"
+
+    def test_real_world_ollama_response(self):
+        text = """<think>
+The user is asking about Python list comprehensions.
+Let me explain the syntax and give examples.
+</think>
+
+List comprehensions in Python provide a concise way to create lists. Here's the syntax:
+
+```python
+[expression for item in iterable if condition]
+```
+
+For example:
+```python
+squares = [x**2 for x in range(10)]
+```"""
+        thinking, clean = extract_think(text)
+        assert "list comprehensions" in thinking.lower()
+        assert "Let me explain" in thinking
+        assert "List comprehensions in Python" in clean
+        assert "<think>" not in clean
+        assert "</think>" not in clean