perf(agent): append runtime context after user content for cache stability

Runtime context (time, channel, sender) changes every turn, so placing
it before user content invalidated the prompt-cache prefix. Appending it
after user content keeps the prefix stable and improves KV cache hit
rates. The stripping logic in _save_turn was simplified from 16 lines
to 6 as a side benefit.
This commit is contained in:
chengyongru 2026-05-15 17:51:36 +08:00 committed by Xubin Ren
parent 164614ccf2
commit 0f3677c0d8
4 changed files with 68 additions and 24 deletions

View File

@ -93,7 +93,7 @@ class ContextBuilder:
channel: str | None, chat_id: str | None, timezone: str | None = None,
sender_id: str | None = None,
) -> str:
"""Build untrusted runtime metadata block for injection before the user message."""
"""Build untrusted runtime metadata block appended after user content."""
lines = [f"Current Time: {current_time_str(timezone)}"]
if channel and chat_id:
lines += [f"Channel: {channel}", f"Chat ID: {chat_id}"]
@ -154,10 +154,12 @@ class ContextBuilder:
# Merge runtime context and user content into a single user message
# to avoid consecutive same-role messages that some providers reject.
# Runtime context is appended to keep the user-content prefix stable
# for prompt-cache hits (the context changes every turn due to time).
if isinstance(user_content, str):
merged = f"{runtime_ctx}\n\n{user_content}"
merged = f"{user_content}\n\n{runtime_ctx}"
else:
merged = [{"type": "text", "text": runtime_ctx}] + user_content
merged = user_content + [{"type": "text", "text": runtime_ctx}]
messages = [
{"role": "system", "content": self.build_system_prompt(skill_names, channel=channel, session_summary=session_summary)},
*history,

View File

@ -720,9 +720,9 @@ class AgentLoop:
self.context.timezone,
)
if isinstance(user_content, str):
merged: str | list[dict[str, Any]] = f"{runtime_ctx}\n\n{user_content}"
merged: str | list[dict[str, Any]] = f"{user_content}\n\n{runtime_ctx}"
else:
merged = [{"type": "text", "text": runtime_ctx}] + user_content
merged = user_content + [{"type": "text", "text": runtime_ctx}]
return {"role": "user", "content": merged}
items: list[dict[str, Any]] = []
@ -1443,24 +1443,14 @@ class AgentLoop:
continue
entry["content"] = filtered
elif role == "user":
if isinstance(content, str) and content.startswith(ContextBuilder._RUNTIME_CONTEXT_TAG):
# Strip the entire runtime-context block (including any session summary).
# The block is bounded by _RUNTIME_CONTEXT_TAG and _RUNTIME_CONTEXT_END.
end_marker = ContextBuilder._RUNTIME_CONTEXT_END
end_pos = content.find(end_marker)
if end_pos >= 0:
after = content[end_pos + len(end_marker):].lstrip("\n")
if after:
entry["content"] = after
else:
continue
if isinstance(content, str) and ContextBuilder._RUNTIME_CONTEXT_TAG in content:
# Strip the runtime-context block appended at the end.
tag_pos = content.find(ContextBuilder._RUNTIME_CONTEXT_TAG)
before = content[:tag_pos].rstrip("\n ")
if before:
entry["content"] = before
else:
# Fallback: no end marker found, strip the tag prefix
after_tag = content[len(ContextBuilder._RUNTIME_CONTEXT_TAG):].lstrip("\n")
if after_tag.strip():
entry["content"] = after_tag
else:
continue
continue
if isinstance(content, list):
filtered = self._sanitize_persisted_blocks(content, drop_runtime=True)
if not filtered:

View File

@ -87,6 +87,24 @@ def test_runtime_context_is_separate_untrusted_user_message(tmp_path) -> None:
assert "Return exactly: OK" in user_content
def test_runtime_context_appended_after_user_content(tmp_path) -> None:
"""User content must precede runtime context for prompt-cache prefix stability."""
workspace = _make_workspace(tmp_path)
builder = ContextBuilder(workspace)
messages = builder.build_messages(
history=[],
current_message="hello world",
channel="cli",
chat_id="direct",
)
content = messages[-1]["content"]
user_pos = content.find("hello world")
tag_pos = content.find(ContextBuilder._RUNTIME_CONTEXT_TAG)
assert user_pos < tag_pos, "user content must precede runtime context for prefix stability"
def test_runtime_context_includes_sender_id_when_provided(tmp_path) -> None:
"""Sender ID should be included in runtime context when provided."""
workspace = _make_workspace(tmp_path)

View File

@ -101,8 +101,8 @@ def test_save_turn_keeps_image_placeholder_with_path_after_runtime_strip() -> No
[{
"role": "user",
"content": [
{"type": "text", "text": runtime},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}, "_meta": {"path": "/media/feishu/photo.jpg"}},
{"type": "text", "text": runtime},
],
}],
skip=0,
@ -120,8 +120,8 @@ def test_save_turn_keeps_image_placeholder_without_meta() -> None:
[{
"role": "user",
"content": [
{"type": "text", "text": runtime},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
{"type": "text", "text": runtime},
],
}],
skip=0,
@ -129,6 +129,40 @@ def test_save_turn_keeps_image_placeholder_without_meta() -> None:
assert session.messages[0]["content"] == [{"type": "text", "text": "[image]"}]
def test_save_turn_strips_runtime_context_suffix_from_string() -> None:
loop = _mk_loop()
session = Session(key="test:suffix-strip")
runtime = (
ContextBuilder._RUNTIME_CONTEXT_TAG
+ "\nCurrent Time: now\n"
+ ContextBuilder._RUNTIME_CONTEXT_END
)
loop._save_turn(
session,
[{"role": "user", "content": f"hello world\n\n{runtime}"}],
skip=0,
)
assert session.messages[0]["content"] == "hello world"
def test_save_turn_skips_string_user_when_only_runtime_context_suffix() -> None:
loop = _mk_loop()
session = Session(key="test:suffix-only")
runtime = (
ContextBuilder._RUNTIME_CONTEXT_TAG
+ "\nCurrent Time: now\n"
+ ContextBuilder._RUNTIME_CONTEXT_END
)
loop._save_turn(
session,
[{"role": "user", "content": runtime}],
skip=0,
)
assert session.messages == []
def test_save_turn_keeps_tool_results_under_16k() -> None:
loop = _mk_loop()
session = Session(key="test:tool-result")