fix(runner): silent retry on empty response before finalization

2026-06-23 03:04:06 +00:00 · 2026-04-07 06:58:54 +00:00 · 2026-04-07 06:58:54 +00:00 · 02597c3ec9
commit 02597c3ec9
parent 0355f20919
3 changed files with 91 additions and 11 deletions
--- a/nanobot/agent/runner.py
+++ b/nanobot/agent/runner.py
@ -30,6 +30,7 @@ from nanobot.utils.runtime import (
 )
 _DEFAULT_ERROR_MESSAGE = "Sorry, I encountered an error calling the AI model."
 _MAX_EMPTY_RETRIES = 2
 _SNIP_SAFETY_BUFFER = 1024
@dataclass(slots=True)
 class AgentRunSpec:
@ -86,6 +87,7 @@ class AgentRunner:
        stop_reason = "completed"
        tool_events: list[dict[str, str]] = []
        external_lookup_counts: dict[str, int] = {}
        empty_content_retries = 0
        for iteration in range(spec.max_iterations):
            try:
@ -178,15 +180,30 @@ class AgentRunner:
                        "pending_tool_calls": [],
                    },
                )
                empty_content_retries = 0
                await hook.after_iteration(context)
                continue
            clean = hook.finalize_content(context, response.content)
            if response.finish_reason != "error" and is_blank_text(clean):
                empty_content_retries += 1
                if empty_content_retries < _MAX_EMPTY_RETRIES:
                    logger.warning(
                        "Empty response on turn {} for {} ({}/{}); retrying",
                        iteration,
                        spec.session_key or "default",
                        empty_content_retries,
                        _MAX_EMPTY_RETRIES,
                    )
                    if hook.wants_streaming():
                        await hook.on_stream_end(context, resuming=False)
                    await hook.after_iteration(context)
                    continue
                logger.warning(
-                    "Empty final response on turn {} for {}; retrying with explicit finalization prompt",
+                    "Empty response on turn {} for {} after {} retries; attempting finalization",
                    iteration,
                    spec.session_key or "default",
                    empty_content_retries,
                )
                if hook.wants_streaming():
                    await hook.on_stream_end(context, resuming=False)
--- a/nanobot/utils/runtime.py
+++ b/nanobot/utils/runtime.py
@ -16,8 +16,7 @@ EMPTY_FINAL_RESPONSE_MESSAGE = (
 )
 FINALIZATION_RETRY_PROMPT = (
-    "You have already finished the tool work. Do not call any more tools. "
+    "Please provide your response to the user based on the conversation above."
    "Using only the conversation and tool results above, provide the final answer for the user now."
 )
--- a/tests/agent/test_runner.py
+++ b/tests/agent/test_runner.py
@ -458,6 +458,7 @@ async def test_runner_uses_raw_messages_when_context_governance_fails():
@pytest.mark.asyncio
 async def test_runner_retries_empty_final_response_with_summary_prompt():
    """Empty responses get 2 silent retries before finalization kicks in."""
    from nanobot.agent.runner import AgentRunSpec, AgentRunner
    provider = MagicMock()
@ -465,11 +466,11 @@ async def test_runner_retries_empty_final_response_with_summary_prompt():
    async def chat_with_retry(*, messages, tools=None, **kwargs):
        calls.append({"messages": messages, "tools": tools})
-        if len(calls) == 1:
+        if len(calls) <= 2:
            return LLMResponse(
                content=None,
                tool_calls=[],
-                usage={"prompt_tokens": 10, "completion_tokens": 1},
+                usage={"prompt_tokens": 5, "completion_tokens": 1},
            )
        return LLMResponse(
            content="final answer",
@ -486,20 +487,23 @@ async def test_runner_retries_empty_final_response_with_summary_prompt():
        initial_messages=[{"role": "user", "content": "do task"}],
        tools=tools,
        model="test-model",
-        max_iterations=1,
+        max_iterations=3,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
    ))
    assert result.final_content == "final answer"
-    assert len(calls) == 2
+    # 2 silent retries (iterations 0,1) + finalization on iteration 1
-    assert calls[1]["tools"] is None
+    assert len(calls) == 3
-    assert "Do not call any more tools" in calls[1]["messages"][-1]["content"]
+    assert calls[0]["tools"] is not None
    assert calls[1]["tools"] is not None
    assert calls[2]["tools"] is None
    assert result.usage["prompt_tokens"] == 13
-    assert result.usage["completion_tokens"] == 8
+    assert result.usage["completion_tokens"] == 9
@pytest.mark.asyncio
 async def test_runner_uses_specific_message_after_empty_finalization_retry():
    """After silent retries + finalization all return empty, stop_reason is empty_final_response."""
    from nanobot.agent.runner import AgentRunSpec, AgentRunner
    from nanobot.utils.runtime import EMPTY_FINAL_RESPONSE_MESSAGE
@ -517,7 +521,7 @@ async def test_runner_uses_specific_message_after_empty_finalization_retry():
        initial_messages=[{"role": "user", "content": "do task"}],
        tools=tools,
        model="test-model",
-        max_iterations=1,
+        max_iterations=3,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
    ))
@ -525,6 +529,66 @@ async def test_runner_uses_specific_message_after_empty_finalization_retry():
    assert result.stop_reason == "empty_final_response"
@pytest.mark.asyncio
 async def test_runner_empty_response_does_not_break_tool_chain():
    """An empty intermediate response must not kill an ongoing tool chain.
    Sequence: tool_call → empty → tool_call → final text.
    The runner should recover via silent retry and complete normally.
    """
    from nanobot.agent.runner import AgentRunSpec, AgentRunner
    provider = MagicMock()
    call_count = 0
    async def chat_with_retry(*, messages, tools=None, **kwargs):
        nonlocal call_count
        call_count += 1
        if call_count == 1:
            return LLMResponse(
                content=None,
                tool_calls=[ToolCallRequest(id="tc1", name="read_file", arguments={"path": "a.txt"})],
                usage={"prompt_tokens": 10, "completion_tokens": 5},
            )
        if call_count == 2:
            return LLMResponse(content=None, tool_calls=[], usage={"prompt_tokens": 10, "completion_tokens": 1})
        if call_count == 3:
            return LLMResponse(
                content=None,
                tool_calls=[ToolCallRequest(id="tc2", name="read_file", arguments={"path": "b.txt"})],
                usage={"prompt_tokens": 10, "completion_tokens": 5},
            )
        return LLMResponse(
            content="Here are the results.",
            tool_calls=[],
            usage={"prompt_tokens": 10, "completion_tokens": 10},
        )
    provider.chat_with_retry = chat_with_retry
    provider.chat_stream_with_retry = chat_with_retry
    async def fake_tool(name, args, **kw):
        return "file content"
    tool_registry = MagicMock()
    tool_registry.get_definitions.return_value = [{"type": "function", "function": {"name": "read_file"}}]
    tool_registry.execute = AsyncMock(side_effect=fake_tool)
    runner = AgentRunner(provider)
    result = await runner.run(AgentRunSpec(
        initial_messages=[{"role": "user", "content": "read both files"}],
        tools=tool_registry,
        model="test-model",
        max_iterations=10,
        max_tool_result_chars=_MAX_TOOL_RESULT_CHARS,
    ))
    assert result.final_content == "Here are the results."
    assert result.stop_reason == "completed"
    assert call_count == 4
    assert "read_file" in result.tools_used
 def test_snip_history_drops_orphaned_tool_results_from_trimmed_slice(monkeypatch):
    from nanobot.agent.runner import AgentRunSpec, AgentRunner