From 41f7eae7b4a7427c83b881f6f79f6e1001956179 Mon Sep 17 00:00:00 2001 From: choiking Date: Sat, 25 Apr 2026 16:58:05 +0800 Subject: [PATCH 01/52] docs: add macOS launchd gateway setup --- docs/deployment.md | 86 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/docs/deployment.md b/docs/deployment.md index ad6283c02..337739726 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -92,3 +92,89 @@ If you edit the `.service` file itself, run `systemctl --user daemon-reload` bef > ```bash > loginctl enable-linger $USER > ``` + +## macOS LaunchAgent + +On macOS, run the gateway as a `launchd` user agent so it starts automatically after login and restarts if it exits unexpectedly. + +**1. Find the nanobot binary path:** + +```bash +which nanobot # e.g. /Users/youruser/.local/bin/nanobot +``` + +If you installed nanobot with `uv tool`, you may also want the Python path for `ProgramArguments`: + +```bash +which python +``` + +**2. Create the LaunchAgent plist** at `~/Library/LaunchAgents/ai.nanobot.gateway.plist` (replace paths if needed): + +```xml + + + + + Label + ai.nanobot.gateway + + ProgramArguments + + /Users/youruser/.local/share/uv/tools/nanobot-ai/bin/python + /Users/youruser/.local/bin/nanobot + gateway + --workspace + /Users/youruser/.nanobot/workspace + + + WorkingDirectory + /Users/youruser/.nanobot/workspace + + RunAtLoad + + + KeepAlive + + SuccessfulExit + + + + StandardOutPath + /Users/youruser/.nanobot/logs/gateway.log + + StandardErrorPath + /Users/youruser/.nanobot/logs/gateway.error.log + + EnvironmentVariables + + PATH + /Users/youruser/.local/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin + PYTHONUNBUFFERED + 1 + + + +``` + +**3. Load and start it:** + +```bash +mkdir -p ~/.nanobot/logs +launchctl bootstrap gui/$(id -u) ~/Library/LaunchAgents/ai.nanobot.gateway.plist +launchctl enable gui/$(id -u)/ai.nanobot.gateway +launchctl kickstart -k gui/$(id -u)/ai.nanobot.gateway +``` + +**Common operations:** + +```bash +launchctl list | grep ai.nanobot.gateway +launchctl kickstart -k gui/$(id -u)/ai.nanobot.gateway +launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/ai.nanobot.gateway.plist +log stream --process nanobot +``` + +If you edit the plist itself, run `launchctl bootout ...` and `launchctl bootstrap ...` again so `launchd` reloads the updated definition. + +> **Note:** if `launchctl kickstart` fails with an "address already in use" error, you probably still have a manually started `nanobot gateway` process running on the same port. Stop the manual process first, then kickstart the LaunchAgent again. From 8a4c338a01b7adc1f35f9f75e741bd7659f3bb5f Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Sat, 25 Apr 2026 11:21:16 +0000 Subject: [PATCH 02/52] docs: tighten macOS launchd setup Made-with: Cursor --- docs/README.md | 2 +- docs/deployment.md | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/docs/README.md b/docs/README.md index 6a3c9bd07..d8ff30247 100644 --- a/docs/README.md +++ b/docs/README.md @@ -18,7 +18,7 @@ Start here for setup, everyday usage, and deployment. | CLI reference | [`cli-reference.md`](./cli-reference.md) | Core CLI commands and common entrypoints | | In-chat commands | [`chat-commands.md`](./chat-commands.md) | Slash commands and periodic task behavior | | OpenAI-compatible API | [`openai-api.md`](./openai-api.md) | Local API endpoints, request format, and file uploads | -| Deployment | [`deployment.md`](./deployment.md) | Docker and Linux service setup | +| Deployment | [`deployment.md`](./deployment.md) | Docker, Linux service, and macOS LaunchAgent setup | ## Advanced Docs diff --git a/docs/deployment.md b/docs/deployment.md index 337739726..fe70d6835 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -103,11 +103,7 @@ On macOS, run the gateway as a `launchd` user agent so it starts automatically a which nanobot # e.g. /Users/youruser/.local/bin/nanobot ``` -If you installed nanobot with `uv tool`, you may also want the Python path for `ProgramArguments`: - -```bash -which python -``` +Use this absolute `nanobot` path in `ProgramArguments` so the console script keeps the Python environment from your install method. **2. Create the LaunchAgent plist** at `~/Library/LaunchAgents/ai.nanobot.gateway.plist` (replace paths if needed): @@ -121,7 +117,6 @@ which python ProgramArguments - /Users/youruser/.local/share/uv/tools/nanobot-ai/bin/python /Users/youruser/.local/bin/nanobot gateway --workspace From 830211b5d4111a1a94ee73612e2841ece529ca7c Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Sat, 25 Apr 2026 11:25:57 +0000 Subject: [PATCH 03/52] docs: simplify macOS launchd setup Made-with: Cursor --- docs/deployment.md | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/docs/deployment.md b/docs/deployment.md index fe70d6835..f22b68857 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -95,17 +95,17 @@ If you edit the `.service` file itself, run `systemctl --user daemon-reload` bef ## macOS LaunchAgent -On macOS, run the gateway as a `launchd` user agent so it starts automatically after login and restarts if it exits unexpectedly. +Use a LaunchAgent when you want `nanobot gateway` to stay online after you log in, without keeping a terminal open. -**1. Find the nanobot binary path:** +**1. Get the absolute `nanobot` path:** ```bash which nanobot # e.g. /Users/youruser/.local/bin/nanobot ``` -Use this absolute `nanobot` path in `ProgramArguments` so the console script keeps the Python environment from your install method. +Use that exact path in the plist. It keeps the Python environment from your install method. -**2. Create the LaunchAgent plist** at `~/Library/LaunchAgents/ai.nanobot.gateway.plist` (replace paths if needed): +**2. Create `~/Library/LaunchAgents/ai.nanobot.gateway.plist`:** ```xml @@ -140,14 +140,6 @@ Use this absolute `nanobot` path in `ProgramArguments` so the console script kee StandardErrorPath /Users/youruser/.nanobot/logs/gateway.error.log - - EnvironmentVariables - - PATH - /Users/youruser/.local/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin - PYTHONUNBUFFERED - 1 - ``` @@ -155,7 +147,7 @@ Use this absolute `nanobot` path in `ProgramArguments` so the console script kee **3. Load and start it:** ```bash -mkdir -p ~/.nanobot/logs +mkdir -p ~/Library/LaunchAgents ~/.nanobot/logs launchctl bootstrap gui/$(id -u) ~/Library/LaunchAgents/ai.nanobot.gateway.plist launchctl enable gui/$(id -u)/ai.nanobot.gateway launchctl kickstart -k gui/$(id -u)/ai.nanobot.gateway @@ -165,11 +157,10 @@ launchctl kickstart -k gui/$(id -u)/ai.nanobot.gateway ```bash launchctl list | grep ai.nanobot.gateway -launchctl kickstart -k gui/$(id -u)/ai.nanobot.gateway +launchctl kickstart -k gui/$(id -u)/ai.nanobot.gateway # restart launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/ai.nanobot.gateway.plist -log stream --process nanobot ``` -If you edit the plist itself, run `launchctl bootout ...` and `launchctl bootstrap ...` again so `launchd` reloads the updated definition. +After editing the plist, run `launchctl bootout ...` and `launchctl bootstrap ...` again. -> **Note:** if `launchctl kickstart` fails with an "address already in use" error, you probably still have a manually started `nanobot gateway` process running on the same port. Stop the manual process first, then kickstart the LaunchAgent again. +> **Note:** if startup fails with "address already in use", stop the manually started `nanobot gateway` process first. From cfc76ffbbffd0cb2b80b17e01373996001f50dde Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Sat, 25 Apr 2026 12:34:29 +0000 Subject: [PATCH 04/52] feat(agent): add ask_user tool Made-with: Cursor --- nanobot/agent/loop.py | 95 ++++++++++++++++++--- nanobot/agent/runner.py | 33 ++++++-- nanobot/agent/tools/ask.py | 50 +++++++++++ tests/agent/test_ask_user.py | 158 +++++++++++++++++++++++++++++++++++ 4 files changed, 320 insertions(+), 16 deletions(-) create mode 100644 nanobot/agent/tools/ask.py create mode 100644 tests/agent/test_ask_user.py diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index ca80475a7..637bb5126 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -20,14 +20,15 @@ from nanobot.agent.memory import Consolidator, Dream from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec from nanobot.agent.skills import BUILTIN_SKILLS_DIR from nanobot.agent.subagent import SubagentManager +from nanobot.agent.tools.ask import AskUserTool from nanobot.agent.tools.cron import CronTool from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool from nanobot.agent.tools.message import MessageTool from nanobot.agent.tools.notebook import NotebookEditTool from nanobot.agent.tools.registry import ToolRegistry from nanobot.agent.tools.search import GlobTool, GrepTool -from nanobot.agent.tools.shell import ExecTool from nanobot.agent.tools.self import MyTool +from nanobot.agent.tools.shell import ExecTool from nanobot.agent.tools.spawn import SpawnTool from nanobot.agent.tools.web import WebFetchTool, WebSearchTool from nanobot.bus.events import InboundMessage, OutboundMessage @@ -287,6 +288,7 @@ class AgentLoop: self.workspace if (self.restrict_to_workspace or self.exec_config.sandbox) else None ) extra_read = [BUILTIN_SKILLS_DIR] if allowed_dir else None + self.tools.register(AskUserTool()) self.tools.register( ReadFileTool( workspace=self.workspace, allowed_dir=allowed_dir, extra_allowed_dirs=extra_read @@ -407,6 +409,56 @@ class AgentLoop: return UNIFIED_SESSION_KEY return msg.session_key + @staticmethod + def _tool_call_name(tool_call: dict[str, Any]) -> str: + function = tool_call.get("function") + if isinstance(function, dict) and isinstance(function.get("name"), str): + return function["name"] + name = tool_call.get("name") + return name if isinstance(name, str) else "" + + @staticmethod + def _tool_call_arguments(tool_call: dict[str, Any]) -> dict[str, Any]: + function = tool_call.get("function") + raw = function.get("arguments") if isinstance(function, dict) else tool_call.get("arguments") + if isinstance(raw, dict): + return raw + if isinstance(raw, str): + try: + parsed = json.loads(raw) + except json.JSONDecodeError: + return {} + return parsed if isinstance(parsed, dict) else {} + return {} + + def _pending_ask_user_id(self, history: list[dict[str, Any]]) -> str | None: + pending: dict[str, str] = {} + for message in history: + if message.get("role") == "assistant": + for tool_call in message.get("tool_calls") or []: + if isinstance(tool_call, dict) and isinstance(tool_call.get("id"), str): + pending[tool_call["id"]] = self._tool_call_name(tool_call) + elif message.get("role") == "tool": + tool_call_id = message.get("tool_call_id") + if isinstance(tool_call_id, str): + pending.pop(tool_call_id, None) + for tool_call_id, name in reversed(pending.items()): + if name == "ask_user": + return tool_call_id + return None + + def _ask_user_options_from_messages(self, messages: list[dict[str, Any]]) -> list[str]: + for message in reversed(messages): + if message.get("role") != "assistant": + continue + for tool_call in reversed(message.get("tool_calls") or []): + if not isinstance(tool_call, dict) or self._tool_call_name(tool_call) != "ask_user": + continue + options = self._tool_call_arguments(tool_call).get("options") + if isinstance(options, list): + return [str(option) for option in options if isinstance(option, str)] + return [] + async def _run_agent_loop( self, initial_messages: list[dict], @@ -799,7 +851,7 @@ class AgentLoop: session_summary=pending, current_role=current_role, ) - final_content, _, all_msgs, _, _ = await self._run_agent_loop( + final_content, _, all_msgs, stop_reason, _ = await self._run_agent_loop( messages, session=session, channel=channel, chat_id=chat_id, message_id=msg.metadata.get("message_id"), pending_queue=pending_queue, @@ -808,10 +860,12 @@ class AgentLoop: self._clear_runtime_checkpoint(session) self.sessions.save(session) self._schedule_background(self.consolidator.maybe_consolidate_by_tokens(session)) + options = self._ask_user_options_from_messages(all_msgs) if stop_reason == "ask_user" else [] return OutboundMessage( channel=channel, chat_id=chat_id, content=final_content or "Background task completed.", + buttons=[options] if options else [], ) # Extract document text from media at the processing boundary so all @@ -850,14 +904,27 @@ class AgentLoop: history = session.get_history(max_messages=0) - initial_messages = self.context.build_messages( - history=history, - current_message=msg.content, - session_summary=pending, - media=msg.media if msg.media else None, - channel=msg.channel, - chat_id=msg.chat_id, - ) + pending_ask_id = self._pending_ask_user_id(history) + if pending_ask_id: + initial_messages = [ + {"role": "system", "content": self.context.build_system_prompt(channel=msg.channel)}, + *history, + { + "role": "tool", + "tool_call_id": pending_ask_id, + "name": "ask_user", + "content": msg.content, + }, + ] + else: + initial_messages = self.context.build_messages( + history=history, + current_message=msg.content, + session_summary=pending, + media=msg.media if msg.media else None, + channel=msg.channel, + chat_id=msg.chat_id, + ) async def _bus_progress( content: str, @@ -898,7 +965,7 @@ class AgentLoop: user_persisted_early = False media_paths = [p for p in (msg.media or []) if isinstance(p, str) and p] has_text = isinstance(msg.content, str) and msg.content.strip() - if has_text or media_paths: + if not pending_ask_id and (has_text or media_paths): extra: dict[str, Any] = {"media": list(media_paths)} if media_paths else {} text = msg.content if isinstance(msg.content, str) else "" session.add_message("user", text, **extra) @@ -944,6 +1011,11 @@ class AgentLoop: logger.info("Response to {}:{}: {}", msg.channel, msg.sender_id, preview) meta = dict(msg.metadata or {}) + buttons: list[list[str]] = [] + if stop_reason == "ask_user": + options = self._ask_user_options_from_messages(all_msgs) + if options: + buttons = [options] if on_stream is not None and stop_reason != "error": meta["_streamed"] = True return OutboundMessage( @@ -951,6 +1023,7 @@ class AgentLoop: chat_id=msg.chat_id, content=final_content, metadata=meta, + buttons=buttons, ) def _sanitize_persisted_blocks( diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index 3704f3030..688d38714 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -3,16 +3,16 @@ from __future__ import annotations import asyncio -from dataclasses import dataclass, field import inspect import os +from dataclasses import dataclass, field from pathlib import Path from typing import Any from loguru import logger from nanobot.agent.hook import AgentHook, AgentHookContext -from nanobot.utils.prompt_templates import render_template +from nanobot.agent.tools.ask import AskUserInterrupt from nanobot.agent.tools.registry import ToolRegistry from nanobot.providers.base import LLMProvider, LLMResponse, ToolCallRequest from nanobot.utils.helpers import ( @@ -23,6 +23,7 @@ from nanobot.utils.helpers import ( maybe_persist_tool_result, truncate_text, ) +from nanobot.utils.prompt_templates import render_template from nanobot.utils.runtime import ( EMPTY_FINAL_RESPONSE_MESSAGE, build_finalization_retry_message, @@ -312,6 +313,8 @@ class AgentRunner: context.tool_events = list(new_events) completed_tool_results: list[dict[str, Any]] = [] for tool_call, result in zip(response.tool_calls, results): + if isinstance(fatal_error, AskUserInterrupt) and tool_call.name == "ask_user": + continue tool_message = { "role": "tool", "tool_call_id": tool_call.id, @@ -326,6 +329,15 @@ class AgentRunner: messages.append(tool_message) completed_tool_results.append(tool_message) if fatal_error is not None: + if isinstance(fatal_error, AskUserInterrupt): + final_content = fatal_error.question + stop_reason = "ask_user" + context.final_content = final_content + context.stop_reason = stop_reason + if hook.wants_streaming(): + await hook.on_stream_end(context, resuming=False) + await hook.after_iteration(context) + break error = f"Error: {type(fatal_error).__name__}: {fatal_error}" final_content = error stop_reason = "tool_error" @@ -656,13 +668,21 @@ class AgentRunner: tool_results: list[tuple[Any, dict[str, str], BaseException | None]] = [] for batch in batches: if spec.concurrent_tools and len(batch) > 1: - tool_results.extend(await asyncio.gather(*( + batch_results = await asyncio.gather(*( self._run_tool(spec, tool_call, external_lookup_counts) for tool_call in batch - ))) + )) + tool_results.extend(batch_results) else: + batch_results = [] for tool_call in batch: - tool_results.append(await self._run_tool(spec, tool_call, external_lookup_counts)) + result = await self._run_tool(spec, tool_call, external_lookup_counts) + tool_results.append(result) + batch_results.append(result) + if isinstance(result[2], AskUserInterrupt): + break + if any(isinstance(error, AskUserInterrupt) for _, _, error in batch_results): + break results: list[Any] = [] events: list[dict[str, str]] = [] @@ -724,6 +744,9 @@ class AgentRunner: "status": "error", "detail": str(exc), } + if isinstance(exc, AskUserInterrupt): + event["status"] = "waiting" + return "", event, exc if spec.fail_on_tool_error: return f"Error: {type(exc).__name__}: {exc}", event, exc return f"Error: {type(exc).__name__}: {exc}", event, None diff --git a/nanobot/agent/tools/ask.py b/nanobot/agent/tools/ask.py new file mode 100644 index 000000000..0ce371ea8 --- /dev/null +++ b/nanobot/agent/tools/ask.py @@ -0,0 +1,50 @@ +"""Tool for pausing a turn until the user answers.""" + +from typing import Any + +from nanobot.agent.tools.base import Tool, tool_parameters +from nanobot.agent.tools.schema import ArraySchema, StringSchema, tool_parameters_schema + + +class AskUserInterrupt(BaseException): + """Internal signal: the runner should stop and wait for user input.""" + + def __init__(self, question: str, options: list[str] | None = None) -> None: + self.question = question + self.options = [str(option) for option in (options or []) if str(option)] + super().__init__(question) + + +@tool_parameters( + tool_parameters_schema( + question=StringSchema( + "The question to ask before continuing. Use this only when the task needs the user's answer." + ), + options=ArraySchema( + StringSchema("A possible answer label"), + description="Optional choices. The user may still reply with free text.", + ), + required=["question"], + ) +) +class AskUserTool(Tool): + """Ask the user a blocking question.""" + + @property + def name(self) -> str: + return "ask_user" + + @property + def description(self) -> str: + return ( + "Pause and ask the user a question when their answer is required to continue. " + "Use options for likely answers; the user's reply, typed or selected, is returned as the tool result. " + "For non-blocking notifications or buttons, use the message tool instead." + ) + + @property + def exclusive(self) -> bool: + return True + + async def execute(self, question: str, options: list[str] | None = None, **_: Any) -> Any: + raise AskUserInterrupt(question=question, options=options) diff --git a/tests/agent/test_ask_user.py b/tests/agent/test_ask_user.py new file mode 100644 index 000000000..fd8993ceb --- /dev/null +++ b/tests/agent/test_ask_user.py @@ -0,0 +1,158 @@ +import asyncio +from unittest.mock import MagicMock + +import pytest + +from nanobot.agent.loop import AgentLoop +from nanobot.agent.runner import AgentRunner, AgentRunSpec +from nanobot.agent.tools.ask import AskUserInterrupt, AskUserTool +from nanobot.agent.tools.base import Tool, tool_parameters +from nanobot.agent.tools.registry import ToolRegistry +from nanobot.agent.tools.schema import tool_parameters_schema +from nanobot.bus.events import InboundMessage +from nanobot.bus.queue import MessageBus +from nanobot.providers.base import GenerationSettings, LLMResponse, ToolCallRequest + + +def _make_provider(chat_with_retry): + provider = MagicMock() + provider.get_default_model.return_value = "test-model" + provider.generation = GenerationSettings() + provider.chat_with_retry = chat_with_retry + return provider + + +def test_ask_user_tool_schema_and_interrupt(): + tool = AskUserTool() + schema = tool.to_schema()["function"] + + assert schema["name"] == "ask_user" + assert "question" in schema["parameters"]["required"] + assert schema["parameters"]["properties"]["options"]["type"] == "array" + + with pytest.raises(AskUserInterrupt) as exc: + asyncio.run(tool.execute("Continue?", options=["Yes", "No"])) + + assert exc.value.question == "Continue?" + assert exc.value.options == ["Yes", "No"] + + +@pytest.mark.asyncio +async def test_runner_pauses_on_ask_user_without_executing_later_tools(): + @tool_parameters(tool_parameters_schema(required=[])) + class LaterTool(Tool): + called = False + + @property + def name(self) -> str: + return "later" + + @property + def description(self) -> str: + return "Should not run after ask_user pauses the turn." + + async def execute(self, **kwargs): + self.called = True + return "later result" + + async def chat_with_retry(**kwargs): + return LLMResponse( + content="", + finish_reason="tool_calls", + tool_calls=[ + ToolCallRequest( + id="call_ask", + name="ask_user", + arguments={"question": "Install this package?", "options": ["Yes", "No"]}, + ), + ToolCallRequest(id="call_later", name="later", arguments={}), + ], + ) + + later = LaterTool() + tools = ToolRegistry() + tools.register(AskUserTool()) + tools.register(later) + + result = await AgentRunner(_make_provider(chat_with_retry)).run(AgentRunSpec( + initial_messages=[{"role": "user", "content": "continue"}], + tools=tools, + model="test-model", + max_iterations=3, + max_tool_result_chars=16_000, + concurrent_tools=True, + )) + + assert result.stop_reason == "ask_user" + assert result.final_content == "Install this package?" + assert "ask_user" in result.tools_used + assert later.called is False + assert result.messages[-1]["role"] == "assistant" + assert result.messages[-1]["tool_calls"][0]["function"]["name"] == "ask_user" + assert not any(message.get("name") == "ask_user" for message in result.messages) + + +@pytest.mark.asyncio +async def test_ask_user_sends_buttons_and_resumes_with_next_message(tmp_path): + seen_messages: list[list[dict]] = [] + + async def chat_with_retry(**kwargs): + seen_messages.append(kwargs["messages"]) + if len(seen_messages) == 1: + return LLMResponse( + content="", + finish_reason="tool_calls", + tool_calls=[ + ToolCallRequest( + id="call_ask", + name="ask_user", + arguments={ + "question": "Install the optional package?", + "options": ["Install", "Skip"], + }, + ) + ], + ) + return LLMResponse(content="Skipped install.", usage={}) + + loop = AgentLoop( + bus=MessageBus(), + provider=_make_provider(chat_with_retry), + workspace=tmp_path, + model="test-model", + ) + + first = await loop._process_message( + InboundMessage(channel="cli", sender_id="user", chat_id="direct", content="set it up") + ) + + assert first is not None + assert first.content == "Install the optional package?" + assert first.buttons == [["Install", "Skip"]] + + session = loop.sessions.get_or_create("cli:direct") + assert any(message.get("role") == "assistant" and message.get("tool_calls") for message in session.messages) + assert not any(message.get("role") == "tool" and message.get("name") == "ask_user" for message in session.messages) + + second = await loop._process_message( + InboundMessage(channel="cli", sender_id="user", chat_id="direct", content="Skip") + ) + + assert second is not None + assert second.content == "Skipped install." + assert any( + message.get("role") == "tool" + and message.get("name") == "ask_user" + and message.get("content") == "Skip" + for message in seen_messages[-1] + ) + assert not any( + message.get("role") == "user" and message.get("content") == "Skip" + for message in session.messages + ) + assert any( + message.get("role") == "tool" + and message.get("name") == "ask_user" + and message.get("content") == "Skip" + for message in session.messages + ) From 3b1ea99ee10574109ad439bfb9f29edfc8e76c01 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Sat, 25 Apr 2026 12:42:09 +0000 Subject: [PATCH 05/52] fix(agent): render ask_user options without buttons Made-with: Cursor --- nanobot/agent/loop.py | 33 ++++++++++++++++++++++------- tests/agent/test_ask_user.py | 40 +++++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 637bb5126..d87ad1a80 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -54,6 +54,7 @@ if TYPE_CHECKING: UNIFIED_SESSION_KEY = "unified:default" +BUTTON_CHANNELS = frozenset({"telegram"}) class _LoopHook(AgentHook): @@ -459,6 +460,19 @@ class AgentLoop: return [str(option) for option in options if isinstance(option, str)] return [] + @staticmethod + def _ask_user_outbound( + content: str | None, + options: list[str], + channel: str, + ) -> tuple[str | None, list[list[str]]]: + if not options: + return content, [] + if channel in BUTTON_CHANNELS: + return content, [options] + option_text = "\n".join(f"{index}. {option}" for index, option in enumerate(options, 1)) + return f"{content}\n\n{option_text}" if content else option_text, [] + async def _run_agent_loop( self, initial_messages: list[dict], @@ -861,11 +875,16 @@ class AgentLoop: self.sessions.save(session) self._schedule_background(self.consolidator.maybe_consolidate_by_tokens(session)) options = self._ask_user_options_from_messages(all_msgs) if stop_reason == "ask_user" else [] + content, buttons = self._ask_user_outbound( + final_content or "Background task completed.", + options, + channel, + ) return OutboundMessage( channel=channel, chat_id=chat_id, - content=final_content or "Background task completed.", - buttons=[options] if options else [], + content=content, + buttons=buttons, ) # Extract document text from media at the processing boundary so all @@ -1011,11 +1030,11 @@ class AgentLoop: logger.info("Response to {}:{}: {}", msg.channel, msg.sender_id, preview) meta = dict(msg.metadata or {}) - buttons: list[list[str]] = [] - if stop_reason == "ask_user": - options = self._ask_user_options_from_messages(all_msgs) - if options: - buttons = [options] + final_content, buttons = self._ask_user_outbound( + final_content, + self._ask_user_options_from_messages(all_msgs) if stop_reason == "ask_user" else [], + msg.channel, + ) if on_stream is not None and stop_reason != "error": meta["_streamed"] = True return OutboundMessage( diff --git a/tests/agent/test_ask_user.py b/tests/agent/test_ask_user.py index fd8993ceb..bdf49663a 100644 --- a/tests/agent/test_ask_user.py +++ b/tests/agent/test_ask_user.py @@ -93,7 +93,7 @@ async def test_runner_pauses_on_ask_user_without_executing_later_tools(): @pytest.mark.asyncio -async def test_ask_user_sends_buttons_and_resumes_with_next_message(tmp_path): +async def test_ask_user_text_fallback_resumes_with_next_message(tmp_path): seen_messages: list[list[dict]] = [] async def chat_with_retry(**kwargs): @@ -127,8 +127,8 @@ async def test_ask_user_sends_buttons_and_resumes_with_next_message(tmp_path): ) assert first is not None - assert first.content == "Install the optional package?" - assert first.buttons == [["Install", "Skip"]] + assert first.content == "Install the optional package?\n\n1. Install\n2. Skip" + assert first.buttons == [] session = loop.sessions.get_or_create("cli:direct") assert any(message.get("role") == "assistant" and message.get("tool_calls") for message in session.messages) @@ -156,3 +156,37 @@ async def test_ask_user_sends_buttons_and_resumes_with_next_message(tmp_path): and message.get("content") == "Skip" for message in session.messages ) + + +@pytest.mark.asyncio +async def test_ask_user_keeps_buttons_for_telegram(tmp_path): + async def chat_with_retry(**kwargs): + return LLMResponse( + content="", + finish_reason="tool_calls", + tool_calls=[ + ToolCallRequest( + id="call_ask", + name="ask_user", + arguments={ + "question": "Install the optional package?", + "options": ["Install", "Skip"], + }, + ) + ], + ) + + loop = AgentLoop( + bus=MessageBus(), + provider=_make_provider(chat_with_retry), + workspace=tmp_path, + model="test-model", + ) + + response = await loop._process_message( + InboundMessage(channel="telegram", sender_id="user", chat_id="123", content="set it up") + ) + + assert response is not None + assert response.content == "Install the optional package?" + assert response.buttons == [["Install", "Skip"]] From 403ce23d22c59fe31a89cc3b4ed8db31d8cf6f17 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Sat, 25 Apr 2026 14:06:09 +0000 Subject: [PATCH 06/52] fix(agent): tighten ask_user CLI handling Made-with: Cursor --- nanobot/agent/loop.py | 100 +++++++---------------------------- nanobot/agent/runner.py | 27 ++++++---- nanobot/agent/tools/ask.py | 86 ++++++++++++++++++++++++++++++ nanobot/cli/commands.py | 4 ++ tests/agent/test_ask_user.py | 19 ++++++- 5 files changed, 142 insertions(+), 94 deletions(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index d87ad1a80..5a4480041 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -20,7 +20,13 @@ from nanobot.agent.memory import Consolidator, Dream from nanobot.agent.runner import _MAX_INJECTIONS_PER_TURN, AgentRunner, AgentRunSpec from nanobot.agent.skills import BUILTIN_SKILLS_DIR from nanobot.agent.subagent import SubagentManager -from nanobot.agent.tools.ask import AskUserTool +from nanobot.agent.tools.ask import ( + AskUserTool, + ask_user_options_from_messages, + ask_user_outbound, + ask_user_tool_result_messages, + pending_ask_user_id, +) from nanobot.agent.tools.cron import CronTool from nanobot.agent.tools.filesystem import EditFileTool, ListDirTool, ReadFileTool, WriteFileTool from nanobot.agent.tools.message import MessageTool @@ -54,7 +60,6 @@ if TYPE_CHECKING: UNIFIED_SESSION_KEY = "unified:default" -BUTTON_CHANNELS = frozenset({"telegram"}) class _LoopHook(AgentHook): @@ -410,69 +415,6 @@ class AgentLoop: return UNIFIED_SESSION_KEY return msg.session_key - @staticmethod - def _tool_call_name(tool_call: dict[str, Any]) -> str: - function = tool_call.get("function") - if isinstance(function, dict) and isinstance(function.get("name"), str): - return function["name"] - name = tool_call.get("name") - return name if isinstance(name, str) else "" - - @staticmethod - def _tool_call_arguments(tool_call: dict[str, Any]) -> dict[str, Any]: - function = tool_call.get("function") - raw = function.get("arguments") if isinstance(function, dict) else tool_call.get("arguments") - if isinstance(raw, dict): - return raw - if isinstance(raw, str): - try: - parsed = json.loads(raw) - except json.JSONDecodeError: - return {} - return parsed if isinstance(parsed, dict) else {} - return {} - - def _pending_ask_user_id(self, history: list[dict[str, Any]]) -> str | None: - pending: dict[str, str] = {} - for message in history: - if message.get("role") == "assistant": - for tool_call in message.get("tool_calls") or []: - if isinstance(tool_call, dict) and isinstance(tool_call.get("id"), str): - pending[tool_call["id"]] = self._tool_call_name(tool_call) - elif message.get("role") == "tool": - tool_call_id = message.get("tool_call_id") - if isinstance(tool_call_id, str): - pending.pop(tool_call_id, None) - for tool_call_id, name in reversed(pending.items()): - if name == "ask_user": - return tool_call_id - return None - - def _ask_user_options_from_messages(self, messages: list[dict[str, Any]]) -> list[str]: - for message in reversed(messages): - if message.get("role") != "assistant": - continue - for tool_call in reversed(message.get("tool_calls") or []): - if not isinstance(tool_call, dict) or self._tool_call_name(tool_call) != "ask_user": - continue - options = self._tool_call_arguments(tool_call).get("options") - if isinstance(options, list): - return [str(option) for option in options if isinstance(option, str)] - return [] - - @staticmethod - def _ask_user_outbound( - content: str | None, - options: list[str], - channel: str, - ) -> tuple[str | None, list[list[str]]]: - if not options: - return content, [] - if channel in BUTTON_CHANNELS: - return content, [options] - option_text = "\n".join(f"{index}. {option}" for index, option in enumerate(options, 1)) - return f"{content}\n\n{option_text}" if content else option_text, [] - async def _run_agent_loop( self, initial_messages: list[dict], @@ -874,8 +816,8 @@ class AgentLoop: self._clear_runtime_checkpoint(session) self.sessions.save(session) self._schedule_background(self.consolidator.maybe_consolidate_by_tokens(session)) - options = self._ask_user_options_from_messages(all_msgs) if stop_reason == "ask_user" else [] - content, buttons = self._ask_user_outbound( + options = ask_user_options_from_messages(all_msgs) if stop_reason == "ask_user" else [] + content, buttons = ask_user_outbound( final_content or "Background task completed.", options, channel, @@ -923,18 +865,14 @@ class AgentLoop: history = session.get_history(max_messages=0) - pending_ask_id = self._pending_ask_user_id(history) + pending_ask_id = pending_ask_user_id(history) if pending_ask_id: - initial_messages = [ - {"role": "system", "content": self.context.build_system_prompt(channel=msg.channel)}, - *history, - { - "role": "tool", - "tool_call_id": pending_ask_id, - "name": "ask_user", - "content": msg.content, - }, - ] + initial_messages = ask_user_tool_result_messages( + self.context.build_system_prompt(channel=msg.channel), + history, + pending_ask_id, + msg.content, + ) else: initial_messages = self.context.build_messages( history=history, @@ -1030,12 +968,12 @@ class AgentLoop: logger.info("Response to {}:{}: {}", msg.channel, msg.sender_id, preview) meta = dict(msg.metadata or {}) - final_content, buttons = self._ask_user_outbound( + final_content, buttons = ask_user_outbound( final_content, - self._ask_user_options_from_messages(all_msgs) if stop_reason == "ask_user" else [], + ask_user_options_from_messages(all_msgs) if stop_reason == "ask_user" else [], msg.channel, ) - if on_stream is not None and stop_reason != "error": + if on_stream is not None and stop_reason not in {"ask_user", "error"}: meta["_streamed"] = True return OutboundMessage( channel=msg.channel, diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py index 688d38714..be71f6498 100644 --- a/nanobot/agent/runner.py +++ b/nanobot/agent/runner.py @@ -278,17 +278,22 @@ class AgentRunner: self._accumulate_usage(usage, raw_usage) if response.should_execute_tools: + tool_calls = list(response.tool_calls) + ask_index = next((i for i, tc in enumerate(tool_calls) if tc.name == "ask_user"), None) + if ask_index is not None: + tool_calls = tool_calls[: ask_index + 1] + context.tool_calls = list(tool_calls) if hook.wants_streaming(): await hook.on_stream_end(context, resuming=True) assistant_message = build_assistant_message( response.content or "", - tool_calls=[tc.to_openai_tool_call() for tc in response.tool_calls], + tool_calls=[tc.to_openai_tool_call() for tc in tool_calls], reasoning_content=response.reasoning_content, thinking_blocks=response.thinking_blocks, ) messages.append(assistant_message) - tools_used.extend(tc.name for tc in response.tool_calls) + tools_used.extend(tc.name for tc in tool_calls) await self._emit_checkpoint( spec, { @@ -297,7 +302,7 @@ class AgentRunner: "model": spec.model, "assistant_message": assistant_message, "completed_tool_results": [], - "pending_tool_calls": [tc.to_openai_tool_call() for tc in response.tool_calls], + "pending_tool_calls": [tc.to_openai_tool_call() for tc in tool_calls], }, ) @@ -305,14 +310,14 @@ class AgentRunner: results, new_events, fatal_error = await self._execute_tools( spec, - response.tool_calls, + tool_calls, external_lookup_counts, ) tool_events.extend(new_events) context.tool_results = list(results) context.tool_events = list(new_events) completed_tool_results: list[dict[str, Any]] = [] - for tool_call, result in zip(response.tool_calls, results): + for tool_call, result in zip(tool_calls, results): if isinstance(fatal_error, AskUserInterrupt) and tool_call.name == "ask_user": continue tool_message = { @@ -700,7 +705,7 @@ class AgentRunner: tool_call: ToolCallRequest, external_lookup_counts: dict[str, int], ) -> tuple[Any, dict[str, str], BaseException | None]: - _HINT = "\n\n[Analyze the error above and try a different approach.]" + hint = "\n\n[Analyze the error above and try a different approach.]" lookup_error = repeated_external_lookup_error( tool_call.name, tool_call.arguments, @@ -713,8 +718,8 @@ class AgentRunner: "detail": "repeated external lookup blocked", } if spec.fail_on_tool_error: - return lookup_error + _HINT, event, RuntimeError(lookup_error) - return lookup_error + _HINT, event, None + return lookup_error + hint, event, RuntimeError(lookup_error) + return lookup_error + hint, event, None prepare_call = getattr(spec.tools, "prepare_call", None) tool, params, prep_error = None, tool_call.arguments, None if callable(prepare_call): @@ -730,7 +735,7 @@ class AgentRunner: "status": "error", "detail": prep_error.split(": ", 1)[-1][:120], } - return prep_error + _HINT, event, RuntimeError(prep_error) if spec.fail_on_tool_error else None + return prep_error + hint, event, RuntimeError(prep_error) if spec.fail_on_tool_error else None try: if tool is not None: result = await tool.execute(**params) @@ -758,8 +763,8 @@ class AgentRunner: "detail": result.replace("\n", " ").strip()[:120], } if spec.fail_on_tool_error: - return result + _HINT, event, RuntimeError(result) - return result + _HINT, event, None + return result + hint, event, RuntimeError(result) + return result + hint, event, None detail = "" if result is None else str(result) detail = detail.replace("\n", " ").strip() diff --git a/nanobot/agent/tools/ask.py b/nanobot/agent/tools/ask.py index 0ce371ea8..c2aa8e0e8 100644 --- a/nanobot/agent/tools/ask.py +++ b/nanobot/agent/tools/ask.py @@ -1,10 +1,13 @@ """Tool for pausing a turn until the user answers.""" +import json from typing import Any from nanobot.agent.tools.base import Tool, tool_parameters from nanobot.agent.tools.schema import ArraySchema, StringSchema, tool_parameters_schema +BUTTON_CHANNELS = frozenset({"telegram"}) + class AskUserInterrupt(BaseException): """Internal signal: the runner should stop and wait for user input.""" @@ -48,3 +51,86 @@ class AskUserTool(Tool): async def execute(self, question: str, options: list[str] | None = None, **_: Any) -> Any: raise AskUserInterrupt(question=question, options=options) + + +def _tool_call_name(tool_call: dict[str, Any]) -> str: + function = tool_call.get("function") + if isinstance(function, dict) and isinstance(function.get("name"), str): + return function["name"] + name = tool_call.get("name") + return name if isinstance(name, str) else "" + + +def _tool_call_arguments(tool_call: dict[str, Any]) -> dict[str, Any]: + function = tool_call.get("function") + raw = function.get("arguments") if isinstance(function, dict) else tool_call.get("arguments") + if isinstance(raw, dict): + return raw + if isinstance(raw, str): + try: + parsed = json.loads(raw) + except json.JSONDecodeError: + return {} + return parsed if isinstance(parsed, dict) else {} + return {} + + +def pending_ask_user_id(history: list[dict[str, Any]]) -> str | None: + pending: dict[str, str] = {} + for message in history: + if message.get("role") == "assistant": + for tool_call in message.get("tool_calls") or []: + if isinstance(tool_call, dict) and isinstance(tool_call.get("id"), str): + pending[tool_call["id"]] = _tool_call_name(tool_call) + elif message.get("role") == "tool": + tool_call_id = message.get("tool_call_id") + if isinstance(tool_call_id, str): + pending.pop(tool_call_id, None) + for tool_call_id, name in reversed(pending.items()): + if name == "ask_user": + return tool_call_id + return None + + +def ask_user_tool_result_messages( + system_prompt: str, + history: list[dict[str, Any]], + tool_call_id: str, + content: str, +) -> list[dict[str, Any]]: + return [ + {"role": "system", "content": system_prompt}, + *history, + { + "role": "tool", + "tool_call_id": tool_call_id, + "name": "ask_user", + "content": content, + }, + ] + + +def ask_user_options_from_messages(messages: list[dict[str, Any]]) -> list[str]: + for message in reversed(messages): + if message.get("role") != "assistant": + continue + for tool_call in reversed(message.get("tool_calls") or []): + if not isinstance(tool_call, dict) or _tool_call_name(tool_call) != "ask_user": + continue + options = _tool_call_arguments(tool_call).get("options") + if isinstance(options, list): + return [str(option) for option in options if isinstance(option, str)] + return [] + + +def ask_user_outbound( + content: str | None, + options: list[str], + channel: str, +) -> tuple[str | None, list[list[str]]]: + if not options: + return content, [] + if channel in BUTTON_CHANNELS: + return content, [options] + option_text = "\n".join(f"{index}. {option}" for index, option in enumerate(options, 1)) + return f"{content}\n\n{option_text}" if content else option_text, [] diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index d5b17518d..c4cd2b1b4 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -212,12 +212,16 @@ async def _print_interactive_response( def _print_cli_progress_line(text: str, thinking: ThinkingSpinner | None) -> None: """Print a CLI progress line, pausing the spinner if needed.""" + if not text.strip(): + return with thinking.pause() if thinking else nullcontext(): console.print(f" [dim]↳ {text}[/dim]") async def _print_interactive_progress_line(text: str, thinking: ThinkingSpinner | None) -> None: """Print an interactive progress line, pausing the spinner if needed.""" + if not text.strip(): + return with thinking.pause() if thinking else nullcontext(): await _print_interactive_line(text) diff --git a/tests/agent/test_ask_user.py b/tests/agent/test_ask_user.py index bdf49663a..4d5b5be93 100644 --- a/tests/agent/test_ask_user.py +++ b/tests/agent/test_ask_user.py @@ -15,10 +15,15 @@ from nanobot.providers.base import GenerationSettings, LLMResponse, ToolCallRequ def _make_provider(chat_with_retry): + async def chat_stream_with_retry(**kwargs): + kwargs.pop("on_content_delta", None) + return await chat_with_retry(**kwargs) + provider = MagicMock() provider.get_default_model.return_value = "test-model" provider.generation = GenerationSettings() provider.chat_with_retry = chat_with_retry + provider.chat_stream_with_retry = chat_stream_with_retry return provider @@ -88,7 +93,8 @@ async def test_runner_pauses_on_ask_user_without_executing_later_tools(): assert "ask_user" in result.tools_used assert later.called is False assert result.messages[-1]["role"] == "assistant" - assert result.messages[-1]["tool_calls"][0]["function"]["name"] == "ask_user" + tool_calls = result.messages[-1]["tool_calls"] + assert [tool_call["function"]["name"] for tool_call in tool_calls] == ["ask_user"] assert not any(message.get("name") == "ask_user" for message in result.messages) @@ -122,13 +128,22 @@ async def test_ask_user_text_fallback_resumes_with_next_message(tmp_path): model="test-model", ) + async def on_stream(delta: str) -> None: + pass + + async def on_stream_end(**kwargs) -> None: + pass + first = await loop._process_message( - InboundMessage(channel="cli", sender_id="user", chat_id="direct", content="set it up") + InboundMessage(channel="cli", sender_id="user", chat_id="direct", content="set it up"), + on_stream=on_stream, + on_stream_end=on_stream_end, ) assert first is not None assert first.content == "Install the optional package?\n\n1. Install\n2. Skip" assert first.buttons == [] + assert "_streamed" not in first.metadata session = loop.sessions.get_or_create("cli:direct") assert any(message.get("role") == "assistant" and message.get("tool_calls") for message in session.messages) From a58d9fd357c778930869f50d2ca3e6dad95773c2 Mon Sep 17 00:00:00 2001 From: Xubin Ren Date: Sat, 25 Apr 2026 15:46:47 +0000 Subject: [PATCH 07/52] feat(webui): render ask_user choices Made-with: Cursor --- nanobot/agent/tools/ask.py | 4 +- nanobot/channels/websocket.py | 16 ++- tests/agent/test_ask_user.py | 34 ++++++ tests/channels/test_websocket_channel.py | 5 +- webui/src/components/thread/AskUserPrompt.tsx | 108 ++++++++++++++++++ webui/src/components/thread/ThreadShell.tsx | 23 ++++ webui/src/hooks/useNanobotStream.ts | 4 +- webui/src/lib/types.ts | 5 + webui/src/tests/thread-shell.test.tsx | 58 +++++++++- webui/src/tests/useNanobotStream.test.tsx | 23 ++++ 10 files changed, 274 insertions(+), 6 deletions(-) create mode 100644 webui/src/components/thread/AskUserPrompt.tsx diff --git a/nanobot/agent/tools/ask.py b/nanobot/agent/tools/ask.py index c2aa8e0e8..db8c83a84 100644 --- a/nanobot/agent/tools/ask.py +++ b/nanobot/agent/tools/ask.py @@ -6,7 +6,7 @@ from typing import Any from nanobot.agent.tools.base import Tool, tool_parameters from nanobot.agent.tools.schema import ArraySchema, StringSchema, tool_parameters_schema -BUTTON_CHANNELS = frozenset({"telegram"}) +STRUCTURED_BUTTON_CHANNELS = frozenset({"telegram", "websocket"}) class AskUserInterrupt(BaseException): @@ -130,7 +130,7 @@ def ask_user_outbound( ) -> tuple[str | None, list[list[str]]]: if not options: return content, [] - if channel in BUTTON_CHANNELS: + if channel in STRUCTURED_BUTTON_CHANNELS: return content, [options] option_text = "\n".join(f"{index}. {option}" for index, option in enumerate(options, 1)) return f"{content}\n\n{option_text}" if content else option_text, [] diff --git a/nanobot/channels/websocket.py b/nanobot/channels/websocket.py index c76371e98..ff923d810 100644 --- a/nanobot/channels/websocket.py +++ b/nanobot/channels/websocket.py @@ -54,6 +54,14 @@ def _normalize_config_path(path: str) -> str: return _strip_trailing_slash(path) +def _append_buttons_as_text(text: str, buttons: list[list[str]]) -> str: + labels = [label for row in buttons for label in row if label] + if not labels: + return text + fallback = "\n".join(f"{index}. {label}" for index, label in enumerate(labels, 1)) + return f"{text}\n\n{fallback}" if text else fallback + + class WebSocketConfig(Base): """WebSocket server channel configuration. @@ -1146,11 +1154,17 @@ class WebSocketChannel(BaseChannel): if not conns: logger.warning("websocket: no active subscribers for chat_id={}", msg.chat_id) return + text = msg.content + if msg.buttons: + text = _append_buttons_as_text(text, msg.buttons) payload: dict[str, Any] = { "event": "message", "chat_id": msg.chat_id, - "text": msg.content, + "text": text, } + if msg.buttons: + payload["buttons"] = msg.buttons + payload["button_prompt"] = msg.content if msg.media: payload["media"] = msg.media urls: list[dict[str, str]] = [] diff --git a/tests/agent/test_ask_user.py b/tests/agent/test_ask_user.py index 4d5b5be93..a192ee4a6 100644 --- a/tests/agent/test_ask_user.py +++ b/tests/agent/test_ask_user.py @@ -205,3 +205,37 @@ async def test_ask_user_keeps_buttons_for_telegram(tmp_path): assert response is not None assert response.content == "Install the optional package?" assert response.buttons == [["Install", "Skip"]] + + +@pytest.mark.asyncio +async def test_ask_user_keeps_buttons_for_websocket(tmp_path): + async def chat_with_retry(**kwargs): + return LLMResponse( + content="", + finish_reason="tool_calls", + tool_calls=[ + ToolCallRequest( + id="call_ask", + name="ask_user", + arguments={ + "question": "Install the optional package?", + "options": ["Install", "Skip"], + }, + ) + ], + ) + + loop = AgentLoop( + bus=MessageBus(), + provider=_make_provider(chat_with_retry), + workspace=tmp_path, + model="test-model", + ) + + response = await loop._process_message( + InboundMessage(channel="websocket", sender_id="user", chat_id="123", content="set it up") + ) + + assert response is not None + assert response.content == "Install the optional package?" + assert response.buttons == [["Install", "Skip"]] diff --git a/tests/channels/test_websocket_channel.py b/tests/channels/test_websocket_channel.py index c92c88ba8..a1d459b94 100644 --- a/tests/channels/test_websocket_channel.py +++ b/tests/channels/test_websocket_channel.py @@ -178,6 +178,7 @@ async def test_send_delivers_json_message_with_media_and_reply() -> None: content="hello", reply_to="m1", media=["/tmp/a.png"], + buttons=[["Yes", "No"]], ) await channel.send(msg) @@ -185,9 +186,11 @@ async def test_send_delivers_json_message_with_media_and_reply() -> None: payload = json.loads(mock_ws.send.call_args[0][0]) assert payload["event"] == "message" assert payload["chat_id"] == "chat-1" - assert payload["text"] == "hello" + assert payload["text"] == "hello\n\n1. Yes\n2. No" + assert payload["button_prompt"] == "hello" assert payload["reply_to"] == "m1" assert payload["media"] == ["/tmp/a.png"] + assert payload["buttons"] == [["Yes", "No"]] @pytest.mark.asyncio diff --git a/webui/src/components/thread/AskUserPrompt.tsx b/webui/src/components/thread/AskUserPrompt.tsx new file mode 100644 index 000000000..3ab20f5e8 --- /dev/null +++ b/webui/src/components/thread/AskUserPrompt.tsx @@ -0,0 +1,108 @@ +import { useCallback, useEffect, useRef, useState } from "react"; +import { MessageSquareText } from "lucide-react"; + +import { Button } from "@/components/ui/button"; +import { cn } from "@/lib/utils"; + +interface AskUserPromptProps { + question: string; + buttons: string[][]; + onAnswer: (answer: string) => void; +} + +export function AskUserPrompt({ + question, + buttons, + onAnswer, +}: AskUserPromptProps) { + const [customOpen, setCustomOpen] = useState(false); + const [custom, setCustom] = useState(""); + const inputRef = useRef(null); + const options = buttons.flat().filter(Boolean); + + useEffect(() => { + if (customOpen) { + inputRef.current?.focus(); + } + }, [customOpen]); + + const submitCustom = useCallback(() => { + const answer = custom.trim(); + if (!answer) return; + onAnswer(answer); + setCustom(""); + setCustomOpen(false); + }, [custom, onAnswer]); + + if (options.length === 0) return null; + + return ( +
+
+
+ +
+

+ {question} +

+
+ +
+ {options.map((option) => ( + + ))} + +
+ + {customOpen ? ( +
+