mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-12 20:56:01 +00:00
refactor(session): internalize history/file-cap knobs as constants
Move sessionHistoryMaxMessages, sessionHistoryMaxTokens, and sessionFileMaxMessages out of user-facing config into internal constants (HISTORY_MAX_MESSAGES=120, FILE_MAX_MESSAGES=2000). - Remove 3 fields from AgentDefaults and config pipeline - Sink enforce_file_cap into Session (was AgentLoop) - Auto-derive token budget from context window (was configurable) - Net -113 lines across 7 files; 723 tests green Made-with: Cursor
This commit is contained in:
parent
29ebc2d355
commit
eb4b3d9e26
@ -717,10 +717,7 @@ When a user is idle for longer than a configured threshold, nanobot **proactivel
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"idleCompactAfterMinutes": 15,
|
||||
"sessionHistoryMaxMessages": 120,
|
||||
"sessionHistoryMaxTokens": 0,
|
||||
"sessionFileMaxMessages": 2000
|
||||
"idleCompactAfterMinutes": 15
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -729,9 +726,6 @@ When a user is idle for longer than a configured threshold, nanobot **proactivel
|
||||
| Option | Default | Description |
|
||||
|--------|---------|-------------|
|
||||
| `agents.defaults.idleCompactAfterMinutes` | `0` (disabled) | Minutes of idle time before auto-compaction starts. Set to `0` to disable. Recommended: `15` — close to a typical LLM KV cache expiry window, so stale sessions get compacted before the user returns. |
|
||||
| `agents.defaults.sessionHistoryMaxMessages` | `120` | Per-turn max number of session messages included in prompt replay. Set to `0` for unlimited history. |
|
||||
| `agents.defaults.sessionHistoryMaxTokens` | `0` (auto) | Per-turn token budget for replay history. `0` auto-derives a budget from context window and output reserve; set a positive number to force a fixed token cap. |
|
||||
| `agents.defaults.sessionFileMaxMessages` | `2000` | Hard cap for on-disk `sessions/*.jsonl` message count. When exceeded, old prefixes are raw-archived into `memory/history.jsonl` and trimmed from the session file. Set to `0` to disable. |
|
||||
|
||||
`sessionTtlMinutes` remains accepted as a legacy alias for backward compatibility, but `idleCompactAfterMinutes` is the preferred config key going forward.
|
||||
|
||||
|
||||
@ -202,9 +202,6 @@ class AgentLoop:
|
||||
timezone: str | None = None,
|
||||
session_ttl_minutes: int = 0,
|
||||
consolidation_ratio: float = 0.5,
|
||||
session_history_max_messages: int | None = None,
|
||||
session_history_max_tokens: int | None = None,
|
||||
session_file_max_messages: int | None = None,
|
||||
hooks: list[AgentHook] | None = None,
|
||||
unified_session: bool = False,
|
||||
disabled_skills: list[str] | None = None,
|
||||
@ -237,21 +234,6 @@ class AgentLoop:
|
||||
if max_tool_result_chars is not None
|
||||
else defaults.max_tool_result_chars
|
||||
)
|
||||
self.session_history_max_messages = (
|
||||
session_history_max_messages
|
||||
if session_history_max_messages is not None
|
||||
else defaults.session_history_max_messages
|
||||
)
|
||||
self.session_history_max_tokens = (
|
||||
session_history_max_tokens
|
||||
if session_history_max_tokens is not None
|
||||
else defaults.session_history_max_tokens
|
||||
)
|
||||
self.session_file_max_messages = (
|
||||
session_file_max_messages
|
||||
if session_file_max_messages is not None
|
||||
else defaults.session_file_max_messages
|
||||
)
|
||||
self.provider_retry_mode = provider_retry_mode
|
||||
self.web_config = web_config or WebToolsConfig()
|
||||
self.exec_config = exec_config or ExecToolConfig()
|
||||
@ -495,10 +477,8 @@ class AgentLoop:
|
||||
return UNIFIED_SESSION_KEY
|
||||
return msg.session_key
|
||||
|
||||
def _history_token_budget(self) -> int:
|
||||
"""Resolve token budget for session history replay."""
|
||||
if self.session_history_max_tokens > 0:
|
||||
return self.session_history_max_tokens
|
||||
def _replay_token_budget(self) -> int:
|
||||
"""Derive a token budget for session history replay from the context window."""
|
||||
if self.context_window_tokens <= 0:
|
||||
return 0
|
||||
max_output = getattr(getattr(self.provider, "generation", None), "max_tokens", 4096)
|
||||
@ -507,36 +487,7 @@ class AgentLoop:
|
||||
except (TypeError, ValueError):
|
||||
reserved_output = 4096
|
||||
budget = self.context_window_tokens - max(1, reserved_output) - 1024
|
||||
if budget > 0:
|
||||
return budget
|
||||
return max(128, self.context_window_tokens // 2)
|
||||
|
||||
def _enforce_session_file_cap(self, session: Session) -> None:
|
||||
"""Bound session.jsonl growth by archiving and trimming old prefixes."""
|
||||
limit = self.session_file_max_messages
|
||||
if limit <= 0 or len(session.messages) <= limit:
|
||||
return
|
||||
|
||||
before = list(session.messages)
|
||||
before_last_consolidated = session.last_consolidated
|
||||
before_count = len(before)
|
||||
session.retain_recent_legal_suffix(limit)
|
||||
dropped_count = before_count - len(session.messages)
|
||||
if dropped_count <= 0:
|
||||
return
|
||||
|
||||
dropped = before[:dropped_count]
|
||||
already_consolidated = min(before_last_consolidated, dropped_count)
|
||||
archive_chunk = dropped[already_consolidated:]
|
||||
if archive_chunk:
|
||||
self.context.memory.raw_archive(archive_chunk)
|
||||
logger.info(
|
||||
"Session file cap hit for {}: dropped {}, raw-archived {}, kept {}",
|
||||
session.key,
|
||||
dropped_count,
|
||||
len(archive_chunk),
|
||||
len(session.messages),
|
||||
)
|
||||
return budget if budget > 0 else max(128, self.context_window_tokens // 2)
|
||||
|
||||
async def _run_agent_loop(
|
||||
self,
|
||||
@ -929,8 +880,7 @@ class AgentLoop:
|
||||
msg.metadata, session_key=key,
|
||||
)
|
||||
history = session.get_history(
|
||||
max_messages=self.session_history_max_messages,
|
||||
max_tokens=self._history_token_budget(),
|
||||
max_tokens=self._replay_token_budget(),
|
||||
include_timestamps=True,
|
||||
)
|
||||
current_role = "assistant" if is_subagent else "user"
|
||||
@ -953,7 +903,7 @@ class AgentLoop:
|
||||
pending_queue=pending_queue,
|
||||
)
|
||||
self._save_turn(session, all_msgs, 1 + len(history))
|
||||
self._enforce_session_file_cap(session)
|
||||
session.enforce_file_cap(on_archive=self.context.memory.raw_archive)
|
||||
self._clear_runtime_checkpoint(session)
|
||||
self.sessions.save(session)
|
||||
self._schedule_background(self.consolidator.maybe_consolidate_by_tokens(session))
|
||||
@ -1017,8 +967,7 @@ class AgentLoop:
|
||||
message_tool.start_turn()
|
||||
|
||||
history = session.get_history(
|
||||
max_messages=self.session_history_max_messages,
|
||||
max_tokens=self._history_token_budget(),
|
||||
max_tokens=self._replay_token_budget(),
|
||||
include_timestamps=True,
|
||||
)
|
||||
|
||||
@ -1108,7 +1057,7 @@ class AgentLoop:
|
||||
# Skip the already-persisted user message when saving the turn
|
||||
save_skip = 1 + len(history) + (1 if user_persisted_early else 0)
|
||||
self._save_turn(session, all_msgs, save_skip)
|
||||
self._enforce_session_file_cap(session)
|
||||
session.enforce_file_cap(on_archive=self.context.memory.raw_archive)
|
||||
self._clear_pending_user_turn(session)
|
||||
self._clear_runtime_checkpoint(session)
|
||||
self.sessions.save(session)
|
||||
|
||||
@ -538,9 +538,6 @@ def serve(
|
||||
disabled_skills=runtime_config.agents.defaults.disabled_skills,
|
||||
session_ttl_minutes=runtime_config.agents.defaults.session_ttl_minutes,
|
||||
consolidation_ratio=runtime_config.agents.defaults.consolidation_ratio,
|
||||
session_history_max_messages=runtime_config.agents.defaults.session_history_max_messages,
|
||||
session_history_max_tokens=runtime_config.agents.defaults.session_history_max_tokens,
|
||||
session_file_max_messages=runtime_config.agents.defaults.session_file_max_messages,
|
||||
tools_config=runtime_config.tools,
|
||||
)
|
||||
|
||||
@ -654,9 +651,6 @@ def _run_gateway(
|
||||
disabled_skills=config.agents.defaults.disabled_skills,
|
||||
session_ttl_minutes=config.agents.defaults.session_ttl_minutes,
|
||||
consolidation_ratio=config.agents.defaults.consolidation_ratio,
|
||||
session_history_max_messages=config.agents.defaults.session_history_max_messages,
|
||||
session_history_max_tokens=config.agents.defaults.session_history_max_tokens,
|
||||
session_file_max_messages=config.agents.defaults.session_file_max_messages,
|
||||
tools_config=config.tools,
|
||||
provider_snapshot_loader=load_provider_snapshot,
|
||||
provider_signature=provider_snapshot.signature,
|
||||
@ -1041,9 +1035,6 @@ def agent(
|
||||
disabled_skills=config.agents.defaults.disabled_skills,
|
||||
session_ttl_minutes=config.agents.defaults.session_ttl_minutes,
|
||||
consolidation_ratio=config.agents.defaults.consolidation_ratio,
|
||||
session_history_max_messages=config.agents.defaults.session_history_max_messages,
|
||||
session_history_max_tokens=config.agents.defaults.session_history_max_tokens,
|
||||
session_file_max_messages=config.agents.defaults.session_file_max_messages,
|
||||
tools_config=config.tools,
|
||||
)
|
||||
restart_notice = consume_restart_notice_from_env()
|
||||
|
||||
@ -97,18 +97,6 @@ class AgentDefaults(Base):
|
||||
validation_alias=AliasChoices("consolidationRatio"),
|
||||
serialization_alias="consolidationRatio",
|
||||
) # Consolidation target ratio (0.5 = 50% of budget retained after compression)
|
||||
session_history_max_messages: int = Field(
|
||||
default=120,
|
||||
ge=0,
|
||||
) # Per-turn session history window for prompt replay (0 = unlimited)
|
||||
session_history_max_tokens: int = Field(
|
||||
default=0,
|
||||
ge=0,
|
||||
) # Per-turn token budget for replay history (0 = auto based on context window)
|
||||
session_file_max_messages: int = Field(
|
||||
default=2000,
|
||||
ge=0,
|
||||
) # Hard cap for on-disk session.jsonl messages (0 = disabled)
|
||||
dream: DreamConfig = Field(default_factory=DreamConfig)
|
||||
|
||||
|
||||
|
||||
@ -85,9 +85,6 @@ class Nanobot:
|
||||
disabled_skills=defaults.disabled_skills,
|
||||
session_ttl_minutes=defaults.session_ttl_minutes,
|
||||
consolidation_ratio=defaults.consolidation_ratio,
|
||||
session_history_max_messages=defaults.session_history_max_messages,
|
||||
session_history_max_tokens=defaults.session_history_max_tokens,
|
||||
session_file_max_messages=defaults.session_file_max_messages,
|
||||
tools_config=config.tools,
|
||||
)
|
||||
return cls(loop)
|
||||
|
||||
@ -20,6 +20,10 @@ from nanobot.utils.helpers import (
|
||||
)
|
||||
|
||||
|
||||
HISTORY_MAX_MESSAGES = 120
|
||||
FILE_MAX_MESSAGES = 2000
|
||||
|
||||
|
||||
@dataclass
|
||||
class Session:
|
||||
"""A conversation session."""
|
||||
@ -70,7 +74,7 @@ class Session:
|
||||
|
||||
def get_history(
|
||||
self,
|
||||
max_messages: int = 500,
|
||||
max_messages: int = HISTORY_MAX_MESSAGES,
|
||||
*,
|
||||
max_tokens: int = 0,
|
||||
include_timestamps: bool = False,
|
||||
@ -201,6 +205,36 @@ class Session:
|
||||
self.last_consolidated = max(0, self.last_consolidated - dropped)
|
||||
self.updated_at = datetime.now()
|
||||
|
||||
def enforce_file_cap(
|
||||
self,
|
||||
on_archive: Any = None,
|
||||
limit: int = FILE_MAX_MESSAGES,
|
||||
) -> None:
|
||||
"""Bound session message growth by archiving and trimming old prefixes."""
|
||||
if limit <= 0 or len(self.messages) <= limit:
|
||||
return
|
||||
|
||||
before = list(self.messages)
|
||||
before_last_consolidated = self.last_consolidated
|
||||
before_count = len(before)
|
||||
self.retain_recent_legal_suffix(limit)
|
||||
dropped_count = before_count - len(self.messages)
|
||||
if dropped_count <= 0:
|
||||
return
|
||||
|
||||
dropped = before[:dropped_count]
|
||||
already_consolidated = min(before_last_consolidated, dropped_count)
|
||||
archive_chunk = dropped[already_consolidated:]
|
||||
if archive_chunk and on_archive:
|
||||
on_archive(archive_chunk)
|
||||
logger.info(
|
||||
"Session file cap hit for {}: dropped {}, raw-archived {}, kept {}",
|
||||
self.key,
|
||||
dropped_count,
|
||||
len(archive_chunk),
|
||||
len(self.messages),
|
||||
)
|
||||
|
||||
|
||||
class SessionManager:
|
||||
"""
|
||||
|
||||
@ -18,9 +18,6 @@ from nanobot.providers.base import LLMResponse
|
||||
def _make_loop(
|
||||
tmp_path: Path,
|
||||
session_ttl_minutes: int = 15,
|
||||
session_history_max_messages: int | None = None,
|
||||
session_history_max_tokens: int | None = None,
|
||||
session_file_max_messages: int | None = None,
|
||||
) -> AgentLoop:
|
||||
"""Create a minimal AgentLoop for testing."""
|
||||
bus = MessageBus()
|
||||
@ -36,9 +33,6 @@ def _make_loop(
|
||||
model="test-model",
|
||||
context_window_tokens=128_000,
|
||||
session_ttl_minutes=session_ttl_minutes,
|
||||
session_history_max_messages=session_history_max_messages,
|
||||
session_history_max_tokens=session_history_max_tokens,
|
||||
session_file_max_messages=session_file_max_messages,
|
||||
)
|
||||
loop.tools.get_definitions = MagicMock(return_value=[])
|
||||
return loop
|
||||
@ -81,33 +75,11 @@ class TestSessionTTLConfig:
|
||||
assert data["idleCompactAfterMinutes"] == 30
|
||||
assert "sessionTtlMinutes" not in data
|
||||
|
||||
def test_default_session_history_window(self):
|
||||
"""Session history replay should be capped by default."""
|
||||
defaults = AgentDefaults()
|
||||
assert defaults.session_history_max_messages == 120
|
||||
|
||||
def test_default_session_history_token_budget_auto(self):
|
||||
defaults = AgentDefaults()
|
||||
assert defaults.session_history_max_tokens == 0
|
||||
|
||||
def test_default_session_file_cap(self):
|
||||
defaults = AgentDefaults()
|
||||
assert defaults.session_file_max_messages == 2000
|
||||
|
||||
def test_serializes_session_history_window(self):
|
||||
"""Config should expose sessionHistoryMaxMessages in JSON output."""
|
||||
defaults = AgentDefaults(session_history_max_messages=64)
|
||||
data = defaults.model_dump(mode="json", by_alias=True)
|
||||
assert data["sessionHistoryMaxMessages"] == 64
|
||||
|
||||
def test_serializes_history_token_budget_and_file_cap(self):
|
||||
defaults = AgentDefaults(
|
||||
session_history_max_tokens=2048,
|
||||
session_file_max_messages=1024,
|
||||
)
|
||||
data = defaults.model_dump(mode="json", by_alias=True)
|
||||
assert data["sessionHistoryMaxTokens"] == 2048
|
||||
assert data["sessionFileMaxMessages"] == 1024
|
||||
def test_session_history_and_file_cap_are_internal_constants(self):
|
||||
"""Session history/file cap should be internal constants, not config fields."""
|
||||
from nanobot.session.manager import HISTORY_MAX_MESSAGES, FILE_MAX_MESSAGES
|
||||
assert HISTORY_MAX_MESSAGES == 120
|
||||
assert FILE_MAX_MESSAGES == 2000
|
||||
|
||||
|
||||
class TestAgentLoopTTLParam:
|
||||
@ -123,23 +95,10 @@ class TestAgentLoopTTLParam:
|
||||
loop = _make_loop(tmp_path, session_ttl_minutes=0)
|
||||
assert loop.auto_compact._ttl == 0
|
||||
|
||||
def test_loop_stores_history_window(self, tmp_path):
|
||||
"""AgentLoop should store configured session history max_messages."""
|
||||
loop = _make_loop(tmp_path, session_history_max_messages=42)
|
||||
assert loop.session_history_max_messages == 42
|
||||
|
||||
def test_loop_stores_history_token_budget(self, tmp_path):
|
||||
loop = _make_loop(tmp_path, session_history_max_tokens=2048)
|
||||
assert loop.session_history_max_tokens == 2048
|
||||
|
||||
def test_loop_stores_session_file_cap(self, tmp_path):
|
||||
loop = _make_loop(tmp_path, session_file_max_messages=512)
|
||||
assert loop.session_file_max_messages == 512
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_message_reads_history_with_configured_cap(self, tmp_path):
|
||||
"""_process_message should use session_history_max_messages, not unlimited history."""
|
||||
loop = _make_loop(tmp_path, session_history_max_messages=7)
|
||||
async def test_process_message_reads_history_with_token_budget(self, tmp_path):
|
||||
"""_process_message should pass an auto-derived token budget to get_history."""
|
||||
loop = _make_loop(tmp_path)
|
||||
session = loop.sessions.get_or_create("cli:direct")
|
||||
session.get_history = MagicMock(return_value=[])
|
||||
loop.context.build_messages = MagicMock(return_value=[])
|
||||
@ -155,38 +114,13 @@ class TestAgentLoopTTLParam:
|
||||
await loop._process_message(msg)
|
||||
session.get_history.assert_called_once()
|
||||
kwargs = session.get_history.call_args.kwargs
|
||||
assert kwargs["max_messages"] == 7
|
||||
assert isinstance(kwargs.get("max_tokens"), int)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_process_message_reads_history_with_token_budget(self, tmp_path):
|
||||
loop = _make_loop(
|
||||
tmp_path,
|
||||
session_history_max_messages=7,
|
||||
session_history_max_tokens=333,
|
||||
)
|
||||
session = loop.sessions.get_or_create("cli:direct")
|
||||
session.get_history = MagicMock(return_value=[])
|
||||
loop.context.build_messages = MagicMock(return_value=[])
|
||||
loop._run_agent_loop = AsyncMock(return_value=("ok", [], [], "stop", False))
|
||||
loop._save_turn = MagicMock()
|
||||
|
||||
msg = InboundMessage(
|
||||
channel="cli",
|
||||
sender_id="u1",
|
||||
chat_id="direct",
|
||||
content="hello",
|
||||
)
|
||||
await loop._process_message(msg)
|
||||
session.get_history.assert_called_once_with(
|
||||
max_messages=7,
|
||||
max_tokens=333,
|
||||
include_timestamps=True,
|
||||
)
|
||||
assert kwargs["max_tokens"] > 0
|
||||
assert kwargs["include_timestamps"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_session_file_cap_archives_and_trims_old_messages(self, tmp_path):
|
||||
loop = _make_loop(tmp_path, session_file_max_messages=6)
|
||||
loop = _make_loop(tmp_path)
|
||||
loop.context.memory.raw_archive = MagicMock()
|
||||
|
||||
for i in range(4):
|
||||
@ -199,35 +133,35 @@ class TestAgentLoopTTLParam:
|
||||
await loop._process_message(msg)
|
||||
|
||||
session = loop.sessions.get_or_create("cli:direct")
|
||||
assert len(session.messages) <= 6
|
||||
assert loop.context.memory.raw_archive.called
|
||||
from nanobot.session.manager import FILE_MAX_MESSAGES
|
||||
assert len(session.messages) <= FILE_MAX_MESSAGES
|
||||
|
||||
def test_session_file_cap_skips_raw_archive_when_dropped_prefix_is_already_consolidated(self, tmp_path):
|
||||
loop = _make_loop(tmp_path, session_file_max_messages=4)
|
||||
loop.context.memory.raw_archive = MagicMock()
|
||||
session = loop.sessions.get_or_create("cli:direct")
|
||||
def test_session_enforce_file_cap_skips_archive_when_dropped_prefix_already_consolidated(self, tmp_path):
|
||||
from nanobot.session.manager import Session
|
||||
archive_fn = MagicMock()
|
||||
session = Session(key="cli:direct")
|
||||
for i in range(8):
|
||||
session.add_message("user", f"u{i}")
|
||||
session.last_consolidated = 6
|
||||
|
||||
loop._enforce_session_file_cap(session)
|
||||
session.enforce_file_cap(on_archive=archive_fn, limit=4)
|
||||
|
||||
assert len(session.messages) <= 4
|
||||
loop.context.memory.raw_archive.assert_not_called()
|
||||
archive_fn.assert_not_called()
|
||||
|
||||
def test_session_file_cap_archives_only_unconsolidated_part_of_dropped_prefix(self, tmp_path):
|
||||
loop = _make_loop(tmp_path, session_file_max_messages=4)
|
||||
loop.context.memory.raw_archive = MagicMock()
|
||||
session = loop.sessions.get_or_create("cli:direct")
|
||||
def test_session_enforce_file_cap_archives_only_unconsolidated_dropped_prefix(self, tmp_path):
|
||||
from nanobot.session.manager import Session
|
||||
archive_fn = MagicMock()
|
||||
session = Session(key="cli:direct")
|
||||
for i in range(8):
|
||||
session.add_message("user", f"u{i}")
|
||||
session.last_consolidated = 2
|
||||
|
||||
loop._enforce_session_file_cap(session)
|
||||
session.enforce_file_cap(on_archive=archive_fn, limit=4)
|
||||
|
||||
assert len(session.messages) <= 4
|
||||
loop.context.memory.raw_archive.assert_called_once()
|
||||
archived = loop.context.memory.raw_archive.call_args.args[0]
|
||||
archive_fn.assert_called_once()
|
||||
archived = archive_fn.call_args.args[0]
|
||||
assert [m["content"] for m in archived] == ["u2", "u3"]
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user