mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-20 08:32:25 +00:00
feat(reasoning): add inline think tag extraction and Anthropic thinking_blocks support
Add extract_think() and emit_incremental_think() helpers to extract thinking content from inline <think> and <thought> tags in the content field. This handles models served via Ollama, self-hosted vLLM, or other compatible endpoints that embed reasoning as inline tags instead of using the dedicated reasoning_content API field. Also adds Anthropic thinking_blocks support for extended thinking via the thinking content blocks array. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
3a27af0018
commit
3a851f8f8d
@ -101,17 +101,23 @@ class _LoopHook(AgentHook):
|
|||||||
self._metadata = metadata or {}
|
self._metadata = metadata or {}
|
||||||
self._session_key = session_key
|
self._session_key = session_key
|
||||||
self._stream_buf = ""
|
self._stream_buf = ""
|
||||||
|
self._emitted_thinking = ""
|
||||||
|
|
||||||
def wants_streaming(self) -> bool:
|
def wants_streaming(self) -> bool:
|
||||||
return self._on_stream is not None
|
return self._on_stream is not None
|
||||||
|
|
||||||
async def on_stream(self, context: AgentHookContext, delta: str) -> None:
|
async def on_stream(self, context: AgentHookContext, delta: str) -> None:
|
||||||
from nanobot.utils.helpers import strip_think
|
from nanobot.utils.helpers import emit_incremental_think, strip_think
|
||||||
|
|
||||||
prev_clean = strip_think(self._stream_buf)
|
prev_clean = strip_think(self._stream_buf)
|
||||||
self._stream_buf += delta
|
self._stream_buf += delta
|
||||||
new_clean = strip_think(self._stream_buf)
|
new_clean = strip_think(self._stream_buf)
|
||||||
incremental = new_clean[len(prev_clean) :]
|
incremental = new_clean[len(prev_clean) :]
|
||||||
|
|
||||||
|
self._emitted_thinking = await emit_incremental_think(
|
||||||
|
self._stream_buf, self._emitted_thinking, self.emit_reasoning,
|
||||||
|
)
|
||||||
|
|
||||||
if incremental and self._on_stream:
|
if incremental and self._on_stream:
|
||||||
await self._on_stream(incremental)
|
await self._on_stream(incremental)
|
||||||
|
|
||||||
@ -119,6 +125,7 @@ class _LoopHook(AgentHook):
|
|||||||
if self._on_stream_end:
|
if self._on_stream_end:
|
||||||
await self._on_stream_end(resuming=resuming)
|
await self._on_stream_end(resuming=resuming)
|
||||||
self._stream_buf = ""
|
self._stream_buf = ""
|
||||||
|
self._emitted_thinking = ""
|
||||||
|
|
||||||
async def before_iteration(self, context: AgentHookContext) -> None:
|
async def before_iteration(self, context: AgentHookContext) -> None:
|
||||||
self._loop._current_iteration = context.iteration
|
self._loop._current_iteration = context.iteration
|
||||||
|
|||||||
@ -18,8 +18,10 @@ from nanobot.agent.tools.registry import ToolRegistry
|
|||||||
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest
|
||||||
from nanobot.utils.helpers import (
|
from nanobot.utils.helpers import (
|
||||||
build_assistant_message,
|
build_assistant_message,
|
||||||
|
emit_incremental_think,
|
||||||
estimate_message_tokens,
|
estimate_message_tokens,
|
||||||
estimate_prompt_tokens_chain,
|
estimate_prompt_tokens_chain,
|
||||||
|
extract_think,
|
||||||
find_legal_message_start,
|
find_legal_message_start,
|
||||||
maybe_persist_tool_result,
|
maybe_persist_tool_result,
|
||||||
strip_think,
|
strip_think,
|
||||||
@ -283,7 +285,23 @@ class AgentRunner:
|
|||||||
self._accumulate_usage(usage, raw_usage)
|
self._accumulate_usage(usage, raw_usage)
|
||||||
|
|
||||||
if response.reasoning_content:
|
if response.reasoning_content:
|
||||||
|
if not context.streamed_content:
|
||||||
await hook.emit_reasoning(response.reasoning_content)
|
await hook.emit_reasoning(response.reasoning_content)
|
||||||
|
if response.content:
|
||||||
|
response.content = strip_think(response.content)
|
||||||
|
elif response.thinking_blocks:
|
||||||
|
# Anthropic extended thinking: extract from thinking_blocks.
|
||||||
|
if not context.streamed_content:
|
||||||
|
parts = [tb.get("thinking", "") for tb in response.thinking_blocks if tb.get("type") == "thinking"]
|
||||||
|
if parts:
|
||||||
|
await hook.emit_reasoning("\n\n".join(parts))
|
||||||
|
elif response.content:
|
||||||
|
inline_thinking, clean_content = extract_think(response.content)
|
||||||
|
if inline_thinking:
|
||||||
|
# Only emit if streaming didn't already handle it.
|
||||||
|
if not context.streamed_content:
|
||||||
|
await hook.emit_reasoning(inline_thinking)
|
||||||
|
response.content = clean_content
|
||||||
|
|
||||||
if response.should_execute_tools:
|
if response.should_execute_tools:
|
||||||
tool_calls = list(response.tool_calls)
|
tool_calls = list(response.tool_calls)
|
||||||
@ -636,15 +654,21 @@ class AgentRunner:
|
|||||||
)
|
)
|
||||||
elif wants_progress_streaming:
|
elif wants_progress_streaming:
|
||||||
stream_buf = ""
|
stream_buf = ""
|
||||||
|
emitted_thinking = ""
|
||||||
|
|
||||||
async def _stream_progress(delta: str) -> None:
|
async def _stream_progress(delta: str) -> None:
|
||||||
nonlocal stream_buf
|
nonlocal stream_buf, emitted_thinking
|
||||||
if not delta:
|
if not delta:
|
||||||
return
|
return
|
||||||
prev_clean = strip_think(stream_buf)
|
prev_clean = strip_think(stream_buf)
|
||||||
stream_buf += delta
|
stream_buf += delta
|
||||||
new_clean = strip_think(stream_buf)
|
new_clean = strip_think(stream_buf)
|
||||||
incremental = new_clean[len(prev_clean):]
|
incremental = new_clean[len(prev_clean):]
|
||||||
|
|
||||||
|
emitted_thinking = await emit_incremental_think(
|
||||||
|
stream_buf, emitted_thinking, hook.emit_reasoning,
|
||||||
|
)
|
||||||
|
|
||||||
if incremental:
|
if incremental:
|
||||||
context.streamed_content = True
|
context.streamed_content = True
|
||||||
await spec.progress_callback(incremental)
|
await spec.progress_callback(incremental)
|
||||||
|
|||||||
@ -71,6 +71,47 @@ def strip_think(text: str) -> str:
|
|||||||
return text.strip()
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def extract_think(text: str) -> tuple[str | None, str]:
|
||||||
|
"""Extract thinking/reasoning content from <think> and <thought> tags.
|
||||||
|
|
||||||
|
Returns (thinking_text, cleaned_text) where:
|
||||||
|
- thinking_text: concatenated content from all <think>...</think> and
|
||||||
|
<thought>...</thought> blocks, or None if none found.
|
||||||
|
- cleaned_text: the input with all thinking blocks removed (same as
|
||||||
|
strip_think()).
|
||||||
|
|
||||||
|
Only extracts from well-formed closed blocks. Unclosed trailing tags
|
||||||
|
(common during streaming) are stripped without extraction — use
|
||||||
|
strip_think() for pure streaming cleanup.
|
||||||
|
"""
|
||||||
|
parts: list[str] = []
|
||||||
|
for m in re.finditer(r"<think>([\s\S]*?)</think>", text):
|
||||||
|
parts.append(m.group(1).strip())
|
||||||
|
for m in re.finditer(r"<thought>([\s\S]*?)</thought>", text):
|
||||||
|
parts.append(m.group(1).strip())
|
||||||
|
thinking = "\n\n".join(parts) if parts else None
|
||||||
|
return thinking, strip_think(text)
|
||||||
|
|
||||||
|
|
||||||
|
async def emit_incremental_think(
|
||||||
|
buf: str,
|
||||||
|
emitted: str,
|
||||||
|
emit_fn: Any,
|
||||||
|
) -> str:
|
||||||
|
"""Extract new thinking from buf and emit if not yet emitted.
|
||||||
|
|
||||||
|
Returns the updated emitted state. *emit_fn* is an async callable
|
||||||
|
that accepts a single reasoning string (e.g. ``hook.emit_reasoning``).
|
||||||
|
"""
|
||||||
|
thinking, _ = extract_think(buf)
|
||||||
|
if thinking and thinking != emitted:
|
||||||
|
new = thinking[len(emitted):]
|
||||||
|
if new.strip():
|
||||||
|
await emit_fn(new.strip())
|
||||||
|
return thinking
|
||||||
|
return emitted
|
||||||
|
|
||||||
|
|
||||||
def detect_image_mime(data: bytes) -> str | None:
|
def detect_image_mime(data: bytes) -> str | None:
|
||||||
"""Detect image MIME type from magic bytes, ignoring file extension."""
|
"""Detect image MIME type from magic bytes, ignoring file extension."""
|
||||||
if data[:8] == b"\x89PNG\r\n\x1a\n":
|
if data[:8] == b"\x89PNG\r\n\x1a\n":
|
||||||
|
|||||||
@ -101,6 +101,132 @@ async def test_runner_preserves_reasoning_fields_and_tool_results():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_emits_anthropic_thinking_blocks():
|
||||||
|
from nanobot.agent.hook import AgentHook, AgentHookContext
|
||||||
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||||
|
|
||||||
|
provider = MagicMock()
|
||||||
|
emitted_reasoning: list[str] = []
|
||||||
|
|
||||||
|
async def chat_with_retry(**kwargs):
|
||||||
|
return LLMResponse(
|
||||||
|
content="The answer is 42.",
|
||||||
|
thinking_blocks=[
|
||||||
|
{"type": "thinking", "thinking": "Let me analyze this step by step.", "signature": "sig1"},
|
||||||
|
{"type": "thinking", "thinking": "After careful consideration.", "signature": "sig2"},
|
||||||
|
],
|
||||||
|
tool_calls=[],
|
||||||
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||||
|
)
|
||||||
|
|
||||||
|
provider.chat_with_retry = chat_with_retry
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
|
||||||
|
class ReasoningHook(AgentHook):
|
||||||
|
async def emit_reasoning(self, reasoning_content: str | None) -> None:
|
||||||
|
if reasoning_content:
|
||||||
|
emitted_reasoning.append(reasoning_content)
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "question"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=3,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
hook=ReasoningHook(),
|
||||||
|
))
|
||||||
|
|
||||||
|
assert result.final_content == "The answer is 42."
|
||||||
|
assert len(emitted_reasoning) == 1
|
||||||
|
assert "Let me analyze this" in emitted_reasoning[0]
|
||||||
|
assert "After careful consideration" in emitted_reasoning[0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_emits_inline_think_content_as_reasoning():
|
||||||
|
"""Models returning <think>...</think> in content should have thinking extracted and emitted."""
|
||||||
|
from nanobot.agent.hook import AgentHook, AgentHookContext
|
||||||
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||||
|
|
||||||
|
provider = MagicMock()
|
||||||
|
emitted_reasoning: list[str] = []
|
||||||
|
|
||||||
|
async def chat_with_retry(**kwargs):
|
||||||
|
return LLMResponse(
|
||||||
|
content="<think>Let me think about this...\nThe answer is 42.</think>The answer is 42.",
|
||||||
|
tool_calls=[],
|
||||||
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||||
|
)
|
||||||
|
|
||||||
|
provider.chat_with_retry = chat_with_retry
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
|
||||||
|
class ReasoningHook(AgentHook):
|
||||||
|
async def emit_reasoning(self, reasoning_content: str | None) -> None:
|
||||||
|
if reasoning_content:
|
||||||
|
emitted_reasoning.append(reasoning_content)
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "what is the answer?"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=3,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
hook=ReasoningHook(),
|
||||||
|
))
|
||||||
|
|
||||||
|
assert result.final_content == "The answer is 42."
|
||||||
|
assert len(emitted_reasoning) == 1
|
||||||
|
assert "Let me think about this" in emitted_reasoning[0]
|
||||||
|
assert "The answer is 42" in emitted_reasoning[0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_runner_prefers_reasoning_content_over_inline_think():
|
||||||
|
from nanobot.agent.hook import AgentHook, AgentHookContext
|
||||||
|
from nanobot.agent.runner import AgentRunSpec, AgentRunner
|
||||||
|
|
||||||
|
provider = MagicMock()
|
||||||
|
emitted_reasoning: list[str] = []
|
||||||
|
|
||||||
|
async def chat_with_retry(**kwargs):
|
||||||
|
return LLMResponse(
|
||||||
|
content="<think>inline thinking</think>The answer.",
|
||||||
|
reasoning_content="dedicated reasoning field",
|
||||||
|
tool_calls=[],
|
||||||
|
usage={"prompt_tokens": 5, "completion_tokens": 3},
|
||||||
|
)
|
||||||
|
|
||||||
|
provider.chat_with_retry = chat_with_retry
|
||||||
|
tools = MagicMock()
|
||||||
|
tools.get_definitions.return_value = []
|
||||||
|
|
||||||
|
class ReasoningHook(AgentHook):
|
||||||
|
async def emit_reasoning(self, reasoning_content: str | None) -> None:
|
||||||
|
if reasoning_content:
|
||||||
|
emitted_reasoning.append(reasoning_content)
|
||||||
|
|
||||||
|
runner = AgentRunner(provider)
|
||||||
|
result = await runner.run(AgentRunSpec(
|
||||||
|
initial_messages=[{"role": "user", "content": "question"}],
|
||||||
|
tools=tools,
|
||||||
|
model="test-model",
|
||||||
|
max_iterations=3,
|
||||||
|
max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
|
||||||
|
hook=ReasoningHook(),
|
||||||
|
))
|
||||||
|
|
||||||
|
assert result.final_content == "The answer."
|
||||||
|
# Only the dedicated field should be emitted, not the inline <think> content
|
||||||
|
assert len(emitted_reasoning) == 1
|
||||||
|
assert emitted_reasoning[0] == "dedicated reasoning field"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_runner_calls_hooks_in_order():
|
async def test_runner_calls_hooks_in_order():
|
||||||
from nanobot.agent.hook import AgentHook, AgentHookContext
|
from nanobot.agent.hook import AgentHook, AgentHookContext
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
from nanobot.utils.helpers import strip_think
|
from nanobot.utils.helpers import extract_think, strip_think
|
||||||
|
|
||||||
|
|
||||||
class TestStripThinkTag:
|
class TestStripThinkTag:
|
||||||
@ -144,3 +144,84 @@ class TestStripThinkConservativePreserve:
|
|||||||
def test_literal_channel_marker_in_code_block_preserved(self):
|
def test_literal_channel_marker_in_code_block_preserved(self):
|
||||||
text = "Example:\n```\nif line.startswith('<channel|>'):\n skip()\n```"
|
text = "Example:\n```\nif line.startswith('<channel|>'):\n skip()\n```"
|
||||||
assert strip_think(text) == text
|
assert strip_think(text) == text
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractThink:
|
||||||
|
|
||||||
|
def test_no_think_tags(self):
|
||||||
|
thinking, clean = extract_think("Hello World")
|
||||||
|
assert thinking is None
|
||||||
|
assert clean == "Hello World"
|
||||||
|
|
||||||
|
def test_single_think_block(self):
|
||||||
|
text = "Hello <think>reasoning content\nhere</think> World"
|
||||||
|
thinking, clean = extract_think(text)
|
||||||
|
assert thinking == "reasoning content\nhere"
|
||||||
|
assert clean == "Hello World"
|
||||||
|
|
||||||
|
def test_single_thought_block(self):
|
||||||
|
text = "Hello <thought>reasoning content</thought> World"
|
||||||
|
thinking, clean = extract_think(text)
|
||||||
|
assert thinking == "reasoning content"
|
||||||
|
assert clean == "Hello World"
|
||||||
|
|
||||||
|
def test_multiple_think_blocks(self):
|
||||||
|
text = "A<think>first</think>B<thought>second</thought>C"
|
||||||
|
thinking, clean = extract_think(text)
|
||||||
|
assert thinking == "first\n\nsecond"
|
||||||
|
assert clean == "ABC"
|
||||||
|
|
||||||
|
def test_think_only_no_content(self):
|
||||||
|
text = "<think>just thinking</think>"
|
||||||
|
thinking, clean = extract_think(text)
|
||||||
|
assert thinking == "just thinking"
|
||||||
|
assert clean == ""
|
||||||
|
|
||||||
|
def test_unclosed_think_not_extracted(self):
|
||||||
|
# Unclosed blocks at start are stripped but NOT extracted
|
||||||
|
text = "<think>unclosed thinking..."
|
||||||
|
thinking, clean = extract_think(text)
|
||||||
|
assert thinking is None
|
||||||
|
assert clean == ""
|
||||||
|
|
||||||
|
def test_empty_think_block(self):
|
||||||
|
text = "Hello <think></think> World"
|
||||||
|
thinking, clean = extract_think(text)
|
||||||
|
# Empty blocks result in empty string after strip
|
||||||
|
assert thinking == ""
|
||||||
|
assert clean == "Hello World"
|
||||||
|
|
||||||
|
def test_think_with_whitespace_only(self):
|
||||||
|
text = "Hello <think> \n World"
|
||||||
|
thinking, clean = extract_think(text)
|
||||||
|
assert thinking is None
|
||||||
|
assert clean == "Hello <think> \n World"
|
||||||
|
|
||||||
|
def test_mixed_think_and_thought(self):
|
||||||
|
text = "Start<think>first reasoning</think>middle<thought>second reasoning</thought>End"
|
||||||
|
thinking, clean = extract_think(text)
|
||||||
|
assert thinking == "first reasoning\n\nsecond reasoning"
|
||||||
|
assert clean == "StartmiddleEnd"
|
||||||
|
|
||||||
|
def test_real_world_ollama_response(self):
|
||||||
|
text = """<think>
|
||||||
|
The user is asking about Python list comprehensions.
|
||||||
|
Let me explain the syntax and give examples.
|
||||||
|
</think>
|
||||||
|
|
||||||
|
List comprehensions in Python provide a concise way to create lists. Here's the syntax:
|
||||||
|
|
||||||
|
```python
|
||||||
|
[expression for item in iterable if condition]
|
||||||
|
```
|
||||||
|
|
||||||
|
For example:
|
||||||
|
```python
|
||||||
|
squares = [x**2 for x in range(10)]
|
||||||
|
```"""
|
||||||
|
thinking, clean = extract_think(text)
|
||||||
|
assert "list comprehensions" in thinking.lower()
|
||||||
|
assert "Let me explain" in thinking
|
||||||
|
assert "List comprehensions in Python" in clean
|
||||||
|
assert "<think>" not in clean
|
||||||
|
assert "</think>" not in clean
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user