diff --git a/nanobot/agent/tools/message.py b/nanobot/agent/tools/message.py index 62730dd6b..8517bb55c 100644 --- a/nanobot/agent/tools/message.py +++ b/nanobot/agent/tools/message.py @@ -13,12 +13,24 @@ from nanobot.config.paths import get_workspace_path @tool_parameters( tool_parameters_schema( - content=StringSchema("The message content to send"), - channel=StringSchema("Optional: target channel (telegram, discord, etc.)"), - chat_id=StringSchema("Optional: target chat/user ID"), + content=StringSchema( + "Message content for proactive or cross-channel delivery. " + "Do not use this for a normal reply in the current chat." + ), + channel=StringSchema( + "Optional target channel for cross-channel/proactive delivery. " + "Do not set this to the current runtime channel for a normal reply." + ), + chat_id=StringSchema( + "Optional target chat/user ID for cross-channel/proactive delivery. " + "Do not set this to the current runtime chat for a normal reply." + ), media=ArraySchema( StringSchema(""), - description="Optional: list of file paths to attach (images, video, audio, documents)", + description=( + "Optional list of existing file paths to attach for proactive or cross-channel delivery. " + "Do not use this to resend generate_image outputs in the current chat." + ), ), buttons=ArraySchema( ArraySchema(StringSchema("Button label")), @@ -100,9 +112,14 @@ class MessageTool(Tool): @property def description(self) -> str: return ( - "Send a message to the user, optionally with file attachments. " - "This is the ONLY way to deliver files (images, documents, audio, video) to the user. " - "Use the 'media' parameter with file paths to attach files. " + "Proactively send a message to a user/channel, optionally with file attachments. " + "Use this for reminders, cross-channel delivery, or explicit proactive sends. " + "Do not use this for the normal reply in the current chat: answer naturally instead. " + "If channel/chat_id would target the current runtime conversation, do not call this tool " + "unless the user explicitly asked you to proactively send an existing file attachment. " + "When generate_image creates images in the current chat, the final assistant reply " + "automatically attaches them; do not call message just to announce or resend them. " + "For proactive attachment delivery, use the 'media' parameter with file paths. " "Do NOT use read_file to send files — that only reads content for your own analysis." ) diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py index 859d2cca8..7aea7b63d 100644 --- a/nanobot/session/manager.py +++ b/nanobot/session/manager.py @@ -2,6 +2,7 @@ import json import os +import re import shutil from contextlib import suppress from dataclasses import dataclass, field @@ -21,6 +22,25 @@ from nanobot.utils.helpers import ( ) FILE_MAX_MESSAGES = 2000 +_MESSAGE_TIME_PREFIX_RE = re.compile(r"^\[Message Time: [^\]]+\]\n?") +_LOCAL_IMAGE_BREADCRUMB_RE = re.compile(r"^\[image: (?:/|~)[^\]]+\]\s*$") +_TOOL_CALL_ECHO_RE = re.compile(r'^\s*(?:generate_image|message)\([^)]*\)\s*$') + + +def _sanitize_assistant_replay_text(content: str) -> str: + """Remove internal replay artifacts that the model may have copied before. + + These strings are useful as runtime/session metadata, but when they appear + in assistant examples they become demonstrations for the model to repeat. + """ + content = _MESSAGE_TIME_PREFIX_RE.sub("", content, count=1) + lines = [ + line + for line in content.splitlines() + if not _LOCAL_IMAGE_BREADCRUMB_RE.match(line) + and not _TOOL_CALL_ECHO_RE.match(line) + ] + return "\n".join(lines).strip() @dataclass @@ -41,22 +61,15 @@ class Session: Annotating *every* assistant turn trains the model (via in-context demonstrations) to start its own replies with the same ``[Message Time: ...]`` prefix, which leaks metadata back to the user. - We therefore only annotate: - - * ``user`` turns — needed so the model can pin the conversation in time. - * proactive deliveries (``_channel_delivery=True``) — cron / heartbeat - assistant pushes that may sit hours away from the next user reply, - and are too infrequent to act as parroting demonstrations. + We therefore only annotate user turns. User-side stamps are enough to + pin adjacent assistant replies for relative-time reasoning, including + proactive messages the user replies to later. """ timestamp = message.get("timestamp") if not timestamp or not isinstance(content, str): return content role = message.get("role") - if role == "user": - pass - elif role == "assistant" and message.get("_channel_delivery"): - pass - else: + if role != "user": return content return f"[Message Time: {timestamp}]\n{content}" @@ -105,19 +118,25 @@ class Session: out: list[dict[str, Any]] = [] for message in sliced: content = message.get("content", "") + role = message.get("role") + if role == "assistant" and isinstance(content, str): + content = _sanitize_assistant_replay_text(content) # Synthesize an ``[image: path]`` breadcrumb from the persisted # ``media`` kwarg so LLM replay still sees *something* where the # image used to be. Without this, an image-only user turn # replays as an empty user message — the assistant's reply then # looks like it's responding to nothing. media = message.get("media") - if isinstance(media, list) and media and isinstance(content, str): + if role == "user" and isinstance(media, list) and media and isinstance(content, str): breadcrumbs = "\n".join( image_placeholder_text(p) for p in media if isinstance(p, str) and p ) content = f"{content}\n{breadcrumbs}" if content else breadcrumbs if include_timestamps: content = self._annotate_message_time(message, content) + if role == "assistant" and isinstance(content, str) and not content.strip(): + if not any(key in message for key in ("tool_calls", "reasoning_content", "thinking_blocks")): + continue entry: dict[str, Any] = {"role": message["role"], "content": content} for key in ("tool_calls", "tool_call_id", "name", "reasoning_content", "thinking_blocks"): if key in message: diff --git a/nanobot/skills/image-generation/SKILL.md b/nanobot/skills/image-generation/SKILL.md index dc89372ee..3ba0e2f45 100644 --- a/nanobot/skills/image-generation/SKILL.md +++ b/nanobot/skills/image-generation/SKILL.md @@ -15,6 +15,7 @@ If the `generate_image` tool is not available in the current tool list, tell the - Image editing: pass the saved artifact path or user image path in `reference_images`. - Iterative edits in the same conversation: prefer the most recent generated image artifact if the user says things like "make it brighter", "change the background", or "try another version". - Ambiguous edits: ask a short clarifying question if multiple recent images could be the target. +- In the current chat, do not call `message` just to announce or resend generated images. The runtime attaches images from `generate_image` to the final assistant reply automatically. ## Prompt Rules @@ -39,9 +40,11 @@ In normal user-facing replies, do not expose local filesystem paths. Keep the re For follow-up edits, pass the prior artifact `path` to `reference_images`. If the user provides a new uploaded image, use that path as the reference instead. +Do not include internal replay markers such as `[Message Time: ...]`, `[image: /local/path]`, `generate_image(...)`, or `message(...)` in user-facing replies. + ## Provider Notes -Do not ask users to paste API keys into chat. If configuration is needed, describe the fields and remind them to restart the gateway after changing config. +Do not ask users to paste API keys into chat. If configuration is needed, describe the fields; LLM provider and BYOK changes are hot-reloaded for new turns. For OpenRouter, the image tool expects: diff --git a/nanobot/templates/agent/identity.md b/nanobot/templates/agent/identity.md index a53be7094..6602f7fe9 100644 --- a/nanobot/templates/agent/identity.md +++ b/nanobot/templates/agent/identity.md @@ -28,5 +28,7 @@ Output is rendered in a terminal. Avoid markdown headings and tables. Use plain - On broad searches, use `grep(output_mode="count")` to scope before requesting full content. {% include 'agent/_snippets/untrusted_content.md' %} -Reply directly with text for conversations. Only use the 'message' tool to send to a specific chat channel. -IMPORTANT: To send files (images, video, audio, documents) to the user, you MUST call the 'message' tool with the 'media' parameter. Do NOT use read_file to "send" a file — reading a file only shows its content to you, it does NOT deliver the file to the user. Examples: message(content="Here is the image", media=["/path/to/file.png"]) or message(content="Here is the video", media=["/path/to/video.mp4"]) +Reply directly with text for the current conversation. Do not use the 'message' tool for normal replies in the current chat. +When you need to call tools before answering, do not include the final user-visible answer in the same assistant message as the tool calls. Wait for the tool results, then answer once. +Use the 'message' tool only for proactive sends, cross-channel delivery, or explicitly sending existing local files as attachments. When a tool such as 'generate_image' creates user-visible media, the runtime attaches those artifacts to the final assistant reply automatically, so do not call 'message' just to announce or resend them. +To send an existing local file that was not automatically attached by another tool, call 'message' with the 'media' parameter. Do NOT use read_file to "send" a file — reading a file only shows its content to you, it does NOT deliver the file to the user. Example: message(content="Here is the document", channel="telegram", chat_id="...", media=["/path/to/file.pdf"]) diff --git a/nanobot/utils/artifacts.py b/nanobot/utils/artifacts.py index 34802a7ac..eca706eed 100644 --- a/nanobot/utils/artifacts.py +++ b/nanobot/utils/artifacts.py @@ -115,7 +115,8 @@ def generated_image_tool_result(artifacts: list[dict[str, Any]]) -> str: "artifacts": artifacts, "next_step": ( "Use these artifact paths as reference_images for follow-up edits. " - "Mention the image id/path to the user; do not paste base64." + "For the current chat, reply naturally; the runtime attaches generated images automatically. " + "Do not call message just to announce or resend them. Keep raw paths internal unless the user asks for debug details." ), }, ensure_ascii=False, diff --git a/tests/agent/test_context_prompt_cache.py b/tests/agent/test_context_prompt_cache.py index bf06b8b2d..6e69dc85b 100644 --- a/tests/agent/test_context_prompt_cache.py +++ b/tests/agent/test_context_prompt_cache.py @@ -289,6 +289,18 @@ def test_build_messages_passes_channel_to_system_prompt(tmp_path) -> None: assert "messaging app" in system +def test_system_prompt_keeps_message_tool_out_of_current_chat_replies(tmp_path) -> None: + workspace = _make_workspace(tmp_path) + builder = ContextBuilder(workspace) + + prompt = builder.build_system_prompt(channel="slack") + + assert "Do not use the 'message' tool for normal replies in the current chat" in prompt + assert "the runtime attaches those artifacts to the final assistant reply automatically" in prompt + assert "do not call 'message' just to announce or resend them" in prompt + assert "Wait for the tool results, then answer once" in prompt + + def test_subagent_result_does_not_create_consecutive_assistant_messages(tmp_path) -> None: workspace = _make_workspace(tmp_path) builder = ContextBuilder(workspace) diff --git a/tests/agent/test_session_manager_history.py b/tests/agent/test_session_manager_history.py index 75bc7713d..9fb77fafd 100644 --- a/tests/agent/test_session_manager_history.py +++ b/tests/agent/test_session_manager_history.py @@ -245,14 +245,8 @@ def test_get_history_annotates_user_turns_but_not_assistant_turns(): ] -def test_get_history_annotates_proactive_assistant_deliveries_with_timestamps(): - """Cron / heartbeat assistant pushes still carry a timestamp prefix. - - These proactive deliveries can sit hours away from the next user reply, - so the model needs to know when they fired. They are rare enough that - they don't act as in-context demonstrations encouraging the model to - prefix its own normal replies with ``[Message Time: ...]``. - """ +def test_get_history_does_not_annotate_proactive_assistant_deliveries_with_timestamps(): + """Assistant-side timestamp examples can leak back into future replies.""" session = Session(key="test:proactive-timestamps") session.messages.append({ "role": "assistant", @@ -271,7 +265,7 @@ def test_get_history_annotates_proactive_assistant_deliveries_with_timestamps(): assert history == [ { "role": "assistant", - "content": "[Message Time: 2026-04-26T15:00:00]\n记得喝水", + "content": "记得喝水", }, { "role": "user", @@ -370,6 +364,41 @@ def test_get_history_ignores_media_kwarg_on_non_user_rows(): assert history[0]["content"] == [{"type": "text", "text": "structured"}] +def test_get_history_does_not_paste_assistant_media_paths_into_replay(): + session = Session(key="test:assistant-media") + session.messages.append( + { + "role": "assistant", + "content": "来了 🎨", + "media": ["/home/user/.nanobot/media/generated/img_abc.png"], + } + ) + + history = session.get_history(max_messages=500) + + assert history == [{"role": "assistant", "content": "来了 🎨"}] + + +def test_get_history_sanitizes_existing_assistant_replay_artifacts(): + session = Session(key="test:polluted-assistant") + session.messages.append( + { + "role": "assistant", + "content": ( + "[Message Time: 2026-05-09 00:33:48]\n" + "来了 🎨\n" + "[image: /home/user/.nanobot/media/generated/img_old.png]\n\n" + "generate_image(\"16:9\")\n" + "message(\"来了 🎨\")" + ), + } + ) + + history = session.get_history(max_messages=500, include_timestamps=True) + + assert history == [{"role": "assistant", "content": "来了 🎨"}] + + def test_get_history_respects_max_tokens(monkeypatch): session = Session(key="test:token-cap") session.messages.extend( diff --git a/tests/tools/test_message_tool_suppress.py b/tests/tools/test_message_tool_suppress.py index 213a8be63..88af40752 100644 --- a/tests/tools/test_message_tool_suppress.py +++ b/tests/tools/test_message_tool_suppress.py @@ -166,3 +166,13 @@ class TestMessageToolTurnTracking: tool._sent_in_turn = True tool.start_turn() assert not tool._sent_in_turn + + def test_schema_discourages_current_chat_replies(self) -> None: + tool = MessageTool() + + assert "Do not use this for the normal reply in the current chat" in tool.description + assert "generate_image creates images in the current chat" in tool.description + assert ( + "Do not use this for a normal reply in the current chat" + in tool.parameters["properties"]["content"]["description"] + ) diff --git a/tests/utils/test_artifacts.py b/tests/utils/test_artifacts.py index 128e394f5..64d2e3f32 100644 --- a/tests/utils/test_artifacts.py +++ b/tests/utils/test_artifacts.py @@ -75,6 +75,7 @@ def test_generated_image_paths_from_tool_results() -> None: {"id": "img_2", "path": "/tmp/two.png"}, ] ) + payload = json.loads(result) assert generated_image_paths_from_messages( [ @@ -82,3 +83,5 @@ def test_generated_image_paths_from_tool_results() -> None: {"role": "tool", "name": "other", "content": result}, ] ) == ["/tmp/one.png", "/tmp/two.png"] + assert "runtime attaches generated images automatically" in payload["next_step"] + assert "Do not call message" in payload["next_step"]