mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-19 16:12:30 +00:00
fix(image): prevent duplicate delivery and replay artifacts
This commit is contained in:
parent
4d168c571c
commit
3231aaf9ee
@ -13,12 +13,24 @@ from nanobot.config.paths import get_workspace_path
|
||||
|
||||
@tool_parameters(
|
||||
tool_parameters_schema(
|
||||
content=StringSchema("The message content to send"),
|
||||
channel=StringSchema("Optional: target channel (telegram, discord, etc.)"),
|
||||
chat_id=StringSchema("Optional: target chat/user ID"),
|
||||
content=StringSchema(
|
||||
"Message content for proactive or cross-channel delivery. "
|
||||
"Do not use this for a normal reply in the current chat."
|
||||
),
|
||||
channel=StringSchema(
|
||||
"Optional target channel for cross-channel/proactive delivery. "
|
||||
"Do not set this to the current runtime channel for a normal reply."
|
||||
),
|
||||
chat_id=StringSchema(
|
||||
"Optional target chat/user ID for cross-channel/proactive delivery. "
|
||||
"Do not set this to the current runtime chat for a normal reply."
|
||||
),
|
||||
media=ArraySchema(
|
||||
StringSchema(""),
|
||||
description="Optional: list of file paths to attach (images, video, audio, documents)",
|
||||
description=(
|
||||
"Optional list of existing file paths to attach for proactive or cross-channel delivery. "
|
||||
"Do not use this to resend generate_image outputs in the current chat."
|
||||
),
|
||||
),
|
||||
buttons=ArraySchema(
|
||||
ArraySchema(StringSchema("Button label")),
|
||||
@ -100,9 +112,14 @@ class MessageTool(Tool):
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return (
|
||||
"Send a message to the user, optionally with file attachments. "
|
||||
"This is the ONLY way to deliver files (images, documents, audio, video) to the user. "
|
||||
"Use the 'media' parameter with file paths to attach files. "
|
||||
"Proactively send a message to a user/channel, optionally with file attachments. "
|
||||
"Use this for reminders, cross-channel delivery, or explicit proactive sends. "
|
||||
"Do not use this for the normal reply in the current chat: answer naturally instead. "
|
||||
"If channel/chat_id would target the current runtime conversation, do not call this tool "
|
||||
"unless the user explicitly asked you to proactively send an existing file attachment. "
|
||||
"When generate_image creates images in the current chat, the final assistant reply "
|
||||
"automatically attaches them; do not call message just to announce or resend them. "
|
||||
"For proactive attachment delivery, use the 'media' parameter with file paths. "
|
||||
"Do NOT use read_file to send files — that only reads content for your own analysis."
|
||||
)
|
||||
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from contextlib import suppress
|
||||
from dataclasses import dataclass, field
|
||||
@ -21,6 +22,25 @@ from nanobot.utils.helpers import (
|
||||
)
|
||||
|
||||
FILE_MAX_MESSAGES = 2000
|
||||
_MESSAGE_TIME_PREFIX_RE = re.compile(r"^\[Message Time: [^\]]+\]\n?")
|
||||
_LOCAL_IMAGE_BREADCRUMB_RE = re.compile(r"^\[image: (?:/|~)[^\]]+\]\s*$")
|
||||
_TOOL_CALL_ECHO_RE = re.compile(r'^\s*(?:generate_image|message)\([^)]*\)\s*$')
|
||||
|
||||
|
||||
def _sanitize_assistant_replay_text(content: str) -> str:
|
||||
"""Remove internal replay artifacts that the model may have copied before.
|
||||
|
||||
These strings are useful as runtime/session metadata, but when they appear
|
||||
in assistant examples they become demonstrations for the model to repeat.
|
||||
"""
|
||||
content = _MESSAGE_TIME_PREFIX_RE.sub("", content, count=1)
|
||||
lines = [
|
||||
line
|
||||
for line in content.splitlines()
|
||||
if not _LOCAL_IMAGE_BREADCRUMB_RE.match(line)
|
||||
and not _TOOL_CALL_ECHO_RE.match(line)
|
||||
]
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -41,22 +61,15 @@ class Session:
|
||||
Annotating *every* assistant turn trains the model (via in-context
|
||||
demonstrations) to start its own replies with the same
|
||||
``[Message Time: ...]`` prefix, which leaks metadata back to the user.
|
||||
We therefore only annotate:
|
||||
|
||||
* ``user`` turns — needed so the model can pin the conversation in time.
|
||||
* proactive deliveries (``_channel_delivery=True``) — cron / heartbeat
|
||||
assistant pushes that may sit hours away from the next user reply,
|
||||
and are too infrequent to act as parroting demonstrations.
|
||||
We therefore only annotate user turns. User-side stamps are enough to
|
||||
pin adjacent assistant replies for relative-time reasoning, including
|
||||
proactive messages the user replies to later.
|
||||
"""
|
||||
timestamp = message.get("timestamp")
|
||||
if not timestamp or not isinstance(content, str):
|
||||
return content
|
||||
role = message.get("role")
|
||||
if role == "user":
|
||||
pass
|
||||
elif role == "assistant" and message.get("_channel_delivery"):
|
||||
pass
|
||||
else:
|
||||
if role != "user":
|
||||
return content
|
||||
return f"[Message Time: {timestamp}]\n{content}"
|
||||
|
||||
@ -105,19 +118,25 @@ class Session:
|
||||
out: list[dict[str, Any]] = []
|
||||
for message in sliced:
|
||||
content = message.get("content", "")
|
||||
role = message.get("role")
|
||||
if role == "assistant" and isinstance(content, str):
|
||||
content = _sanitize_assistant_replay_text(content)
|
||||
# Synthesize an ``[image: path]`` breadcrumb from the persisted
|
||||
# ``media`` kwarg so LLM replay still sees *something* where the
|
||||
# image used to be. Without this, an image-only user turn
|
||||
# replays as an empty user message — the assistant's reply then
|
||||
# looks like it's responding to nothing.
|
||||
media = message.get("media")
|
||||
if isinstance(media, list) and media and isinstance(content, str):
|
||||
if role == "user" and isinstance(media, list) and media and isinstance(content, str):
|
||||
breadcrumbs = "\n".join(
|
||||
image_placeholder_text(p) for p in media if isinstance(p, str) and p
|
||||
)
|
||||
content = f"{content}\n{breadcrumbs}" if content else breadcrumbs
|
||||
if include_timestamps:
|
||||
content = self._annotate_message_time(message, content)
|
||||
if role == "assistant" and isinstance(content, str) and not content.strip():
|
||||
if not any(key in message for key in ("tool_calls", "reasoning_content", "thinking_blocks")):
|
||||
continue
|
||||
entry: dict[str, Any] = {"role": message["role"], "content": content}
|
||||
for key in ("tool_calls", "tool_call_id", "name", "reasoning_content", "thinking_blocks"):
|
||||
if key in message:
|
||||
|
||||
@ -15,6 +15,7 @@ If the `generate_image` tool is not available in the current tool list, tell the
|
||||
- Image editing: pass the saved artifact path or user image path in `reference_images`.
|
||||
- Iterative edits in the same conversation: prefer the most recent generated image artifact if the user says things like "make it brighter", "change the background", or "try another version".
|
||||
- Ambiguous edits: ask a short clarifying question if multiple recent images could be the target.
|
||||
- In the current chat, do not call `message` just to announce or resend generated images. The runtime attaches images from `generate_image` to the final assistant reply automatically.
|
||||
|
||||
## Prompt Rules
|
||||
|
||||
@ -39,9 +40,11 @@ In normal user-facing replies, do not expose local filesystem paths. Keep the re
|
||||
|
||||
For follow-up edits, pass the prior artifact `path` to `reference_images`. If the user provides a new uploaded image, use that path as the reference instead.
|
||||
|
||||
Do not include internal replay markers such as `[Message Time: ...]`, `[image: /local/path]`, `generate_image(...)`, or `message(...)` in user-facing replies.
|
||||
|
||||
## Provider Notes
|
||||
|
||||
Do not ask users to paste API keys into chat. If configuration is needed, describe the fields and remind them to restart the gateway after changing config.
|
||||
Do not ask users to paste API keys into chat. If configuration is needed, describe the fields; LLM provider and BYOK changes are hot-reloaded for new turns.
|
||||
|
||||
For OpenRouter, the image tool expects:
|
||||
|
||||
|
||||
@ -28,5 +28,7 @@ Output is rendered in a terminal. Avoid markdown headings and tables. Use plain
|
||||
- On broad searches, use `grep(output_mode="count")` to scope before requesting full content.
|
||||
{% include 'agent/_snippets/untrusted_content.md' %}
|
||||
|
||||
Reply directly with text for conversations. Only use the 'message' tool to send to a specific chat channel.
|
||||
IMPORTANT: To send files (images, video, audio, documents) to the user, you MUST call the 'message' tool with the 'media' parameter. Do NOT use read_file to "send" a file — reading a file only shows its content to you, it does NOT deliver the file to the user. Examples: message(content="Here is the image", media=["/path/to/file.png"]) or message(content="Here is the video", media=["/path/to/video.mp4"])
|
||||
Reply directly with text for the current conversation. Do not use the 'message' tool for normal replies in the current chat.
|
||||
When you need to call tools before answering, do not include the final user-visible answer in the same assistant message as the tool calls. Wait for the tool results, then answer once.
|
||||
Use the 'message' tool only for proactive sends, cross-channel delivery, or explicitly sending existing local files as attachments. When a tool such as 'generate_image' creates user-visible media, the runtime attaches those artifacts to the final assistant reply automatically, so do not call 'message' just to announce or resend them.
|
||||
To send an existing local file that was not automatically attached by another tool, call 'message' with the 'media' parameter. Do NOT use read_file to "send" a file — reading a file only shows its content to you, it does NOT deliver the file to the user. Example: message(content="Here is the document", channel="telegram", chat_id="...", media=["/path/to/file.pdf"])
|
||||
|
||||
@ -115,7 +115,8 @@ def generated_image_tool_result(artifacts: list[dict[str, Any]]) -> str:
|
||||
"artifacts": artifacts,
|
||||
"next_step": (
|
||||
"Use these artifact paths as reference_images for follow-up edits. "
|
||||
"Mention the image id/path to the user; do not paste base64."
|
||||
"For the current chat, reply naturally; the runtime attaches generated images automatically. "
|
||||
"Do not call message just to announce or resend them. Keep raw paths internal unless the user asks for debug details."
|
||||
),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
|
||||
@ -289,6 +289,18 @@ def test_build_messages_passes_channel_to_system_prompt(tmp_path) -> None:
|
||||
assert "messaging app" in system
|
||||
|
||||
|
||||
def test_system_prompt_keeps_message_tool_out_of_current_chat_replies(tmp_path) -> None:
|
||||
workspace = _make_workspace(tmp_path)
|
||||
builder = ContextBuilder(workspace)
|
||||
|
||||
prompt = builder.build_system_prompt(channel="slack")
|
||||
|
||||
assert "Do not use the 'message' tool for normal replies in the current chat" in prompt
|
||||
assert "the runtime attaches those artifacts to the final assistant reply automatically" in prompt
|
||||
assert "do not call 'message' just to announce or resend them" in prompt
|
||||
assert "Wait for the tool results, then answer once" in prompt
|
||||
|
||||
|
||||
def test_subagent_result_does_not_create_consecutive_assistant_messages(tmp_path) -> None:
|
||||
workspace = _make_workspace(tmp_path)
|
||||
builder = ContextBuilder(workspace)
|
||||
|
||||
@ -245,14 +245,8 @@ def test_get_history_annotates_user_turns_but_not_assistant_turns():
|
||||
]
|
||||
|
||||
|
||||
def test_get_history_annotates_proactive_assistant_deliveries_with_timestamps():
|
||||
"""Cron / heartbeat assistant pushes still carry a timestamp prefix.
|
||||
|
||||
These proactive deliveries can sit hours away from the next user reply,
|
||||
so the model needs to know when they fired. They are rare enough that
|
||||
they don't act as in-context demonstrations encouraging the model to
|
||||
prefix its own normal replies with ``[Message Time: ...]``.
|
||||
"""
|
||||
def test_get_history_does_not_annotate_proactive_assistant_deliveries_with_timestamps():
|
||||
"""Assistant-side timestamp examples can leak back into future replies."""
|
||||
session = Session(key="test:proactive-timestamps")
|
||||
session.messages.append({
|
||||
"role": "assistant",
|
||||
@ -271,7 +265,7 @@ def test_get_history_annotates_proactive_assistant_deliveries_with_timestamps():
|
||||
assert history == [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Message Time: 2026-04-26T15:00:00]\n记得喝水",
|
||||
"content": "记得喝水",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
@ -370,6 +364,41 @@ def test_get_history_ignores_media_kwarg_on_non_user_rows():
|
||||
assert history[0]["content"] == [{"type": "text", "text": "structured"}]
|
||||
|
||||
|
||||
def test_get_history_does_not_paste_assistant_media_paths_into_replay():
|
||||
session = Session(key="test:assistant-media")
|
||||
session.messages.append(
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "来了 🎨",
|
||||
"media": ["/home/user/.nanobot/media/generated/img_abc.png"],
|
||||
}
|
||||
)
|
||||
|
||||
history = session.get_history(max_messages=500)
|
||||
|
||||
assert history == [{"role": "assistant", "content": "来了 🎨"}]
|
||||
|
||||
|
||||
def test_get_history_sanitizes_existing_assistant_replay_artifacts():
|
||||
session = Session(key="test:polluted-assistant")
|
||||
session.messages.append(
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": (
|
||||
"[Message Time: 2026-05-09 00:33:48]\n"
|
||||
"来了 🎨\n"
|
||||
"[image: /home/user/.nanobot/media/generated/img_old.png]\n\n"
|
||||
"generate_image(\"16:9\")\n"
|
||||
"message(\"来了 🎨\")"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
history = session.get_history(max_messages=500, include_timestamps=True)
|
||||
|
||||
assert history == [{"role": "assistant", "content": "来了 🎨"}]
|
||||
|
||||
|
||||
def test_get_history_respects_max_tokens(monkeypatch):
|
||||
session = Session(key="test:token-cap")
|
||||
session.messages.extend(
|
||||
|
||||
@ -166,3 +166,13 @@ class TestMessageToolTurnTracking:
|
||||
tool._sent_in_turn = True
|
||||
tool.start_turn()
|
||||
assert not tool._sent_in_turn
|
||||
|
||||
def test_schema_discourages_current_chat_replies(self) -> None:
|
||||
tool = MessageTool()
|
||||
|
||||
assert "Do not use this for the normal reply in the current chat" in tool.description
|
||||
assert "generate_image creates images in the current chat" in tool.description
|
||||
assert (
|
||||
"Do not use this for a normal reply in the current chat"
|
||||
in tool.parameters["properties"]["content"]["description"]
|
||||
)
|
||||
|
||||
@ -75,6 +75,7 @@ def test_generated_image_paths_from_tool_results() -> None:
|
||||
{"id": "img_2", "path": "/tmp/two.png"},
|
||||
]
|
||||
)
|
||||
payload = json.loads(result)
|
||||
|
||||
assert generated_image_paths_from_messages(
|
||||
[
|
||||
@ -82,3 +83,5 @@ def test_generated_image_paths_from_tool_results() -> None:
|
||||
{"role": "tool", "name": "other", "content": result},
|
||||
]
|
||||
) == ["/tmp/one.png", "/tmp/two.png"]
|
||||
assert "runtime attaches generated images automatically" in payload["next_step"]
|
||||
assert "Do not call message" in payload["next_step"]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user