fix(session): stop training the model to parrot [Message Time: ...]

Past assistant turns in history were prefixed with "[Message Time: ...]"
just like user turns. The model treated these as in-context demos and
started prefixing its own replies with the same marker, leaking
metadata to the user. Prompt-level warnings could not beat dozens of
prior assistant samples.

Annotate only user turns and proactive deliveries
(_channel_delivery=True, i.e. cron / heartbeat pushes whose timing is
the whole point and which are too infrequent to act as demos). Adjacent
user-side timestamps still pin every normal assistant reply for
relative-time reasoning. The now-redundant identity.md warning is
removed along with the demonstration source.
This commit is contained in:
Xubin Ren 2026-04-27 07:11:20 +00:00
parent 620d9e4f31
commit 311a7fe36e
5 changed files with 72 additions and 16 deletions

View File

@ -32,13 +32,27 @@ class Session:
@staticmethod
def _annotate_message_time(message: dict[str, Any], content: Any) -> Any:
"""Expose persisted turn timestamps to the model for relative-date reasoning."""
"""Expose persisted turn timestamps to the model for relative-date reasoning.
Annotating *every* assistant turn trains the model (via in-context
demonstrations) to start its own replies with the same
``[Message Time: ...]`` prefix, which leaks metadata back to the user.
We therefore only annotate:
* ``user`` turns needed so the model can pin the conversation in time.
* proactive deliveries (``_channel_delivery=True``) cron / heartbeat
assistant pushes that may sit hours away from the next user reply,
and are too infrequent to act as parroting demonstrations.
"""
timestamp = message.get("timestamp")
if (
not timestamp
or message.get("role") not in {"user", "assistant"}
or not isinstance(content, str)
):
if not timestamp or not isinstance(content, str):
return content
role = message.get("role")
if role == "user":
pass
elif role == "assistant" and message.get("_channel_delivery"):
pass
else:
return content
return f"[Message Time: {timestamp}]\n{content}"

View File

@ -28,9 +28,5 @@ Output is rendered in a terminal. Avoid markdown headings and tables. Use plain
- On broad searches, use `grep(output_mode="count")` to scope before requesting full content.
{% include 'agent/_snippets/untrusted_content.md' %}
Historical messages may include `[Message Time: ...]` prefixes. Treat them as
metadata for chronology only; never quote, copy, or include those markers in
your response.
Reply directly with text for conversations. Only use the 'message' tool to send to a specific chat channel.
IMPORTANT: To send files (images, video, audio, documents) to the user, you MUST call the 'message' tool with the 'media' parameter. Do NOT use read_file to "send" a file — reading a file only shows its content to you, it does NOT deliver the file to the user. Examples: message(content="Here is the image", media=["/path/to/file.png"]) or message(content="Here is the video", media=["/path/to/video.mp4"])

View File

@ -188,14 +188,15 @@ def test_identity_has_no_behavioral_instructions(tmp_path) -> None:
assert "Execution Rules" not in identity
def test_system_prompt_treats_message_time_as_metadata(tmp_path) -> None:
def test_system_prompt_does_not_warn_about_message_time_markers(tmp_path) -> None:
"""Parroting is prevented by not annotating assistant turns in history;
no prompt-level warning about ``[Message Time: ...]`` is needed."""
workspace = _make_workspace(tmp_path)
builder = ContextBuilder(workspace)
prompt = builder.build_system_prompt()
assert "Historical messages may include `[Message Time: ...]` prefixes" in prompt
assert "never quote, copy, or include those markers" in prompt
assert "Message Time" not in prompt
def test_default_soul_template_contains_execution_rules() -> None:

View File

@ -537,8 +537,12 @@ async def test_system_subagent_followup_is_persisted_before_prompt_assembly(tmp_
non_system = [m for m in seen["initial_messages"] if m.get("role") != "system"]
assert "question" in non_system[0]["content"]
assert "working" in non_system[1]["content"]
# User turns carry the timestamp prefix so the model can reason about
# relative time. Assistant turns do NOT, otherwise the model treats those
# past replies as in-context examples and starts its own outputs with
# ``[Message Time: ...]`` (which then leaks back to the user).
assert "[Message Time:" in non_system[0]["content"]
assert "[Message Time:" in non_system[1]["content"]
assert "[Message Time:" not in non_system[1]["content"]
assert non_system[2]["content"].count("subagent result") == 1
assert "Current Time:" in non_system[2]["content"]

View File

@ -194,7 +194,13 @@ def test_get_history_preserves_reasoning_content():
]
def test_get_history_exposes_turn_timestamps_to_model():
def test_get_history_annotates_user_turns_but_not_assistant_turns():
"""Only user turns carry the timestamp prefix.
Annotating assistant turns trains the model (via in-context examples) to
start its own replies with ``[Message Time: ...]``. User-side stamps are
enough to pin adjacent assistant replies for relative-time reasoning.
"""
session = Session(key="test:timestamps")
session.messages.append({
"role": "user",
@ -216,7 +222,42 @@ def test_get_history_exposes_turn_timestamps_to_model():
},
{
"role": "assistant",
"content": "[Message Time: 2026-04-26T22:00:05]\n记下来了",
"content": "记下来了",
},
]
def test_get_history_annotates_proactive_assistant_deliveries_with_timestamps():
"""Cron / heartbeat assistant pushes still carry a timestamp prefix.
These proactive deliveries can sit hours away from the next user reply,
so the model needs to know when they fired. They are rare enough that
they don't act as in-context demonstrations encouraging the model to
prefix its own normal replies with ``[Message Time: ...]``.
"""
session = Session(key="test:proactive-timestamps")
session.messages.append({
"role": "assistant",
"content": "记得喝水",
"timestamp": "2026-04-26T15:00:00",
"_channel_delivery": True,
})
session.messages.append({
"role": "user",
"content": "",
"timestamp": "2026-04-26T18:00:00",
})
history = session.get_history(max_messages=500, include_timestamps=True)
assert history == [
{
"role": "assistant",
"content": "[Message Time: 2026-04-26T15:00:00]\n记得喝水",
},
{
"role": "user",
"content": "[Message Time: 2026-04-26T18:00:00]\n",
},
]