diff --git a/docs/configuration.md b/docs/configuration.md index b04a98222..153cbc959 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -717,10 +717,7 @@ When a user is idle for longer than a configured threshold, nanobot **proactivel { "agents": { "defaults": { - "idleCompactAfterMinutes": 15, - "sessionHistoryMaxMessages": 120, - "sessionHistoryMaxTokens": 0, - "sessionFileMaxMessages": 2000 + "idleCompactAfterMinutes": 15 } } } @@ -729,9 +726,6 @@ When a user is idle for longer than a configured threshold, nanobot **proactivel | Option | Default | Description | |--------|---------|-------------| | `agents.defaults.idleCompactAfterMinutes` | `0` (disabled) | Minutes of idle time before auto-compaction starts. Set to `0` to disable. Recommended: `15` — close to a typical LLM KV cache expiry window, so stale sessions get compacted before the user returns. | -| `agents.defaults.sessionHistoryMaxMessages` | `120` | Per-turn max number of session messages included in prompt replay. Set to `0` for unlimited history. | -| `agents.defaults.sessionHistoryMaxTokens` | `0` (auto) | Per-turn token budget for replay history. `0` auto-derives a budget from context window and output reserve; set a positive number to force a fixed token cap. | -| `agents.defaults.sessionFileMaxMessages` | `2000` | Hard cap for on-disk `sessions/*.jsonl` message count. When exceeded, old prefixes are raw-archived into `memory/history.jsonl` and trimmed from the session file. Set to `0` to disable. | `sessionTtlMinutes` remains accepted as a legacy alias for backward compatibility, but `idleCompactAfterMinutes` is the preferred config key going forward. diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 3c936a85f..0f7dfd90b 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -202,9 +202,6 @@ class AgentLoop: timezone: str | None = None, session_ttl_minutes: int = 0, consolidation_ratio: float = 0.5, - session_history_max_messages: int | None = None, - session_history_max_tokens: int | None = None, - session_file_max_messages: int | None = None, hooks: list[AgentHook] | None = None, unified_session: bool = False, disabled_skills: list[str] | None = None, @@ -237,21 +234,6 @@ class AgentLoop: if max_tool_result_chars is not None else defaults.max_tool_result_chars ) - self.session_history_max_messages = ( - session_history_max_messages - if session_history_max_messages is not None - else defaults.session_history_max_messages - ) - self.session_history_max_tokens = ( - session_history_max_tokens - if session_history_max_tokens is not None - else defaults.session_history_max_tokens - ) - self.session_file_max_messages = ( - session_file_max_messages - if session_file_max_messages is not None - else defaults.session_file_max_messages - ) self.provider_retry_mode = provider_retry_mode self.web_config = web_config or WebToolsConfig() self.exec_config = exec_config or ExecToolConfig() @@ -495,10 +477,8 @@ class AgentLoop: return UNIFIED_SESSION_KEY return msg.session_key - def _history_token_budget(self) -> int: - """Resolve token budget for session history replay.""" - if self.session_history_max_tokens > 0: - return self.session_history_max_tokens + def _replay_token_budget(self) -> int: + """Derive a token budget for session history replay from the context window.""" if self.context_window_tokens <= 0: return 0 max_output = getattr(getattr(self.provider, "generation", None), "max_tokens", 4096) @@ -507,36 +487,7 @@ class AgentLoop: except (TypeError, ValueError): reserved_output = 4096 budget = self.context_window_tokens - max(1, reserved_output) - 1024 - if budget > 0: - return budget - return max(128, self.context_window_tokens // 2) - - def _enforce_session_file_cap(self, session: Session) -> None: - """Bound session.jsonl growth by archiving and trimming old prefixes.""" - limit = self.session_file_max_messages - if limit <= 0 or len(session.messages) <= limit: - return - - before = list(session.messages) - before_last_consolidated = session.last_consolidated - before_count = len(before) - session.retain_recent_legal_suffix(limit) - dropped_count = before_count - len(session.messages) - if dropped_count <= 0: - return - - dropped = before[:dropped_count] - already_consolidated = min(before_last_consolidated, dropped_count) - archive_chunk = dropped[already_consolidated:] - if archive_chunk: - self.context.memory.raw_archive(archive_chunk) - logger.info( - "Session file cap hit for {}: dropped {}, raw-archived {}, kept {}", - session.key, - dropped_count, - len(archive_chunk), - len(session.messages), - ) + return budget if budget > 0 else max(128, self.context_window_tokens // 2) async def _run_agent_loop( self, @@ -929,8 +880,7 @@ class AgentLoop: msg.metadata, session_key=key, ) history = session.get_history( - max_messages=self.session_history_max_messages, - max_tokens=self._history_token_budget(), + max_tokens=self._replay_token_budget(), include_timestamps=True, ) current_role = "assistant" if is_subagent else "user" @@ -953,7 +903,7 @@ class AgentLoop: pending_queue=pending_queue, ) self._save_turn(session, all_msgs, 1 + len(history)) - self._enforce_session_file_cap(session) + session.enforce_file_cap(on_archive=self.context.memory.raw_archive) self._clear_runtime_checkpoint(session) self.sessions.save(session) self._schedule_background(self.consolidator.maybe_consolidate_by_tokens(session)) @@ -1017,8 +967,7 @@ class AgentLoop: message_tool.start_turn() history = session.get_history( - max_messages=self.session_history_max_messages, - max_tokens=self._history_token_budget(), + max_tokens=self._replay_token_budget(), include_timestamps=True, ) @@ -1108,7 +1057,7 @@ class AgentLoop: # Skip the already-persisted user message when saving the turn save_skip = 1 + len(history) + (1 if user_persisted_early else 0) self._save_turn(session, all_msgs, save_skip) - self._enforce_session_file_cap(session) + session.enforce_file_cap(on_archive=self.context.memory.raw_archive) self._clear_pending_user_turn(session) self._clear_runtime_checkpoint(session) self.sessions.save(session) diff --git a/nanobot/cli/commands.py b/nanobot/cli/commands.py index 05de44998..2b911d756 100644 --- a/nanobot/cli/commands.py +++ b/nanobot/cli/commands.py @@ -538,9 +538,6 @@ def serve( disabled_skills=runtime_config.agents.defaults.disabled_skills, session_ttl_minutes=runtime_config.agents.defaults.session_ttl_minutes, consolidation_ratio=runtime_config.agents.defaults.consolidation_ratio, - session_history_max_messages=runtime_config.agents.defaults.session_history_max_messages, - session_history_max_tokens=runtime_config.agents.defaults.session_history_max_tokens, - session_file_max_messages=runtime_config.agents.defaults.session_file_max_messages, tools_config=runtime_config.tools, ) @@ -654,9 +651,6 @@ def _run_gateway( disabled_skills=config.agents.defaults.disabled_skills, session_ttl_minutes=config.agents.defaults.session_ttl_minutes, consolidation_ratio=config.agents.defaults.consolidation_ratio, - session_history_max_messages=config.agents.defaults.session_history_max_messages, - session_history_max_tokens=config.agents.defaults.session_history_max_tokens, - session_file_max_messages=config.agents.defaults.session_file_max_messages, tools_config=config.tools, provider_snapshot_loader=load_provider_snapshot, provider_signature=provider_snapshot.signature, @@ -1041,9 +1035,6 @@ def agent( disabled_skills=config.agents.defaults.disabled_skills, session_ttl_minutes=config.agents.defaults.session_ttl_minutes, consolidation_ratio=config.agents.defaults.consolidation_ratio, - session_history_max_messages=config.agents.defaults.session_history_max_messages, - session_history_max_tokens=config.agents.defaults.session_history_max_tokens, - session_file_max_messages=config.agents.defaults.session_file_max_messages, tools_config=config.tools, ) restart_notice = consume_restart_notice_from_env() diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py index 3feed668e..e1f91aeb0 100644 --- a/nanobot/config/schema.py +++ b/nanobot/config/schema.py @@ -97,18 +97,6 @@ class AgentDefaults(Base): validation_alias=AliasChoices("consolidationRatio"), serialization_alias="consolidationRatio", ) # Consolidation target ratio (0.5 = 50% of budget retained after compression) - session_history_max_messages: int = Field( - default=120, - ge=0, - ) # Per-turn session history window for prompt replay (0 = unlimited) - session_history_max_tokens: int = Field( - default=0, - ge=0, - ) # Per-turn token budget for replay history (0 = auto based on context window) - session_file_max_messages: int = Field( - default=2000, - ge=0, - ) # Hard cap for on-disk session.jsonl messages (0 = disabled) dream: DreamConfig = Field(default_factory=DreamConfig) diff --git a/nanobot/nanobot.py b/nanobot/nanobot.py index 53ec718d2..d2bff97d7 100644 --- a/nanobot/nanobot.py +++ b/nanobot/nanobot.py @@ -85,9 +85,6 @@ class Nanobot: disabled_skills=defaults.disabled_skills, session_ttl_minutes=defaults.session_ttl_minutes, consolidation_ratio=defaults.consolidation_ratio, - session_history_max_messages=defaults.session_history_max_messages, - session_history_max_tokens=defaults.session_history_max_tokens, - session_file_max_messages=defaults.session_file_max_messages, tools_config=config.tools, ) return cls(loop) diff --git a/nanobot/session/manager.py b/nanobot/session/manager.py index 93d96c607..095b3f550 100644 --- a/nanobot/session/manager.py +++ b/nanobot/session/manager.py @@ -20,6 +20,10 @@ from nanobot.utils.helpers import ( ) +HISTORY_MAX_MESSAGES = 120 +FILE_MAX_MESSAGES = 2000 + + @dataclass class Session: """A conversation session.""" @@ -70,7 +74,7 @@ class Session: def get_history( self, - max_messages: int = 500, + max_messages: int = HISTORY_MAX_MESSAGES, *, max_tokens: int = 0, include_timestamps: bool = False, @@ -201,6 +205,36 @@ class Session: self.last_consolidated = max(0, self.last_consolidated - dropped) self.updated_at = datetime.now() + def enforce_file_cap( + self, + on_archive: Any = None, + limit: int = FILE_MAX_MESSAGES, + ) -> None: + """Bound session message growth by archiving and trimming old prefixes.""" + if limit <= 0 or len(self.messages) <= limit: + return + + before = list(self.messages) + before_last_consolidated = self.last_consolidated + before_count = len(before) + self.retain_recent_legal_suffix(limit) + dropped_count = before_count - len(self.messages) + if dropped_count <= 0: + return + + dropped = before[:dropped_count] + already_consolidated = min(before_last_consolidated, dropped_count) + archive_chunk = dropped[already_consolidated:] + if archive_chunk and on_archive: + on_archive(archive_chunk) + logger.info( + "Session file cap hit for {}: dropped {}, raw-archived {}, kept {}", + self.key, + dropped_count, + len(archive_chunk), + len(self.messages), + ) + class SessionManager: """ diff --git a/tests/agent/test_auto_compact.py b/tests/agent/test_auto_compact.py index b3b5a07db..12578223b 100644 --- a/tests/agent/test_auto_compact.py +++ b/tests/agent/test_auto_compact.py @@ -18,9 +18,6 @@ from nanobot.providers.base import LLMResponse def _make_loop( tmp_path: Path, session_ttl_minutes: int = 15, - session_history_max_messages: int | None = None, - session_history_max_tokens: int | None = None, - session_file_max_messages: int | None = None, ) -> AgentLoop: """Create a minimal AgentLoop for testing.""" bus = MessageBus() @@ -36,9 +33,6 @@ def _make_loop( model="test-model", context_window_tokens=128_000, session_ttl_minutes=session_ttl_minutes, - session_history_max_messages=session_history_max_messages, - session_history_max_tokens=session_history_max_tokens, - session_file_max_messages=session_file_max_messages, ) loop.tools.get_definitions = MagicMock(return_value=[]) return loop @@ -81,33 +75,11 @@ class TestSessionTTLConfig: assert data["idleCompactAfterMinutes"] == 30 assert "sessionTtlMinutes" not in data - def test_default_session_history_window(self): - """Session history replay should be capped by default.""" - defaults = AgentDefaults() - assert defaults.session_history_max_messages == 120 - - def test_default_session_history_token_budget_auto(self): - defaults = AgentDefaults() - assert defaults.session_history_max_tokens == 0 - - def test_default_session_file_cap(self): - defaults = AgentDefaults() - assert defaults.session_file_max_messages == 2000 - - def test_serializes_session_history_window(self): - """Config should expose sessionHistoryMaxMessages in JSON output.""" - defaults = AgentDefaults(session_history_max_messages=64) - data = defaults.model_dump(mode="json", by_alias=True) - assert data["sessionHistoryMaxMessages"] == 64 - - def test_serializes_history_token_budget_and_file_cap(self): - defaults = AgentDefaults( - session_history_max_tokens=2048, - session_file_max_messages=1024, - ) - data = defaults.model_dump(mode="json", by_alias=True) - assert data["sessionHistoryMaxTokens"] == 2048 - assert data["sessionFileMaxMessages"] == 1024 + def test_session_history_and_file_cap_are_internal_constants(self): + """Session history/file cap should be internal constants, not config fields.""" + from nanobot.session.manager import HISTORY_MAX_MESSAGES, FILE_MAX_MESSAGES + assert HISTORY_MAX_MESSAGES == 120 + assert FILE_MAX_MESSAGES == 2000 class TestAgentLoopTTLParam: @@ -123,23 +95,10 @@ class TestAgentLoopTTLParam: loop = _make_loop(tmp_path, session_ttl_minutes=0) assert loop.auto_compact._ttl == 0 - def test_loop_stores_history_window(self, tmp_path): - """AgentLoop should store configured session history max_messages.""" - loop = _make_loop(tmp_path, session_history_max_messages=42) - assert loop.session_history_max_messages == 42 - - def test_loop_stores_history_token_budget(self, tmp_path): - loop = _make_loop(tmp_path, session_history_max_tokens=2048) - assert loop.session_history_max_tokens == 2048 - - def test_loop_stores_session_file_cap(self, tmp_path): - loop = _make_loop(tmp_path, session_file_max_messages=512) - assert loop.session_file_max_messages == 512 - @pytest.mark.asyncio - async def test_process_message_reads_history_with_configured_cap(self, tmp_path): - """_process_message should use session_history_max_messages, not unlimited history.""" - loop = _make_loop(tmp_path, session_history_max_messages=7) + async def test_process_message_reads_history_with_token_budget(self, tmp_path): + """_process_message should pass an auto-derived token budget to get_history.""" + loop = _make_loop(tmp_path) session = loop.sessions.get_or_create("cli:direct") session.get_history = MagicMock(return_value=[]) loop.context.build_messages = MagicMock(return_value=[]) @@ -155,38 +114,13 @@ class TestAgentLoopTTLParam: await loop._process_message(msg) session.get_history.assert_called_once() kwargs = session.get_history.call_args.kwargs - assert kwargs["max_messages"] == 7 assert isinstance(kwargs.get("max_tokens"), int) - - @pytest.mark.asyncio - async def test_process_message_reads_history_with_token_budget(self, tmp_path): - loop = _make_loop( - tmp_path, - session_history_max_messages=7, - session_history_max_tokens=333, - ) - session = loop.sessions.get_or_create("cli:direct") - session.get_history = MagicMock(return_value=[]) - loop.context.build_messages = MagicMock(return_value=[]) - loop._run_agent_loop = AsyncMock(return_value=("ok", [], [], "stop", False)) - loop._save_turn = MagicMock() - - msg = InboundMessage( - channel="cli", - sender_id="u1", - chat_id="direct", - content="hello", - ) - await loop._process_message(msg) - session.get_history.assert_called_once_with( - max_messages=7, - max_tokens=333, - include_timestamps=True, - ) + assert kwargs["max_tokens"] > 0 + assert kwargs["include_timestamps"] is True @pytest.mark.asyncio async def test_session_file_cap_archives_and_trims_old_messages(self, tmp_path): - loop = _make_loop(tmp_path, session_file_max_messages=6) + loop = _make_loop(tmp_path) loop.context.memory.raw_archive = MagicMock() for i in range(4): @@ -199,35 +133,35 @@ class TestAgentLoopTTLParam: await loop._process_message(msg) session = loop.sessions.get_or_create("cli:direct") - assert len(session.messages) <= 6 - assert loop.context.memory.raw_archive.called + from nanobot.session.manager import FILE_MAX_MESSAGES + assert len(session.messages) <= FILE_MAX_MESSAGES - def test_session_file_cap_skips_raw_archive_when_dropped_prefix_is_already_consolidated(self, tmp_path): - loop = _make_loop(tmp_path, session_file_max_messages=4) - loop.context.memory.raw_archive = MagicMock() - session = loop.sessions.get_or_create("cli:direct") + def test_session_enforce_file_cap_skips_archive_when_dropped_prefix_already_consolidated(self, tmp_path): + from nanobot.session.manager import Session + archive_fn = MagicMock() + session = Session(key="cli:direct") for i in range(8): session.add_message("user", f"u{i}") session.last_consolidated = 6 - loop._enforce_session_file_cap(session) + session.enforce_file_cap(on_archive=archive_fn, limit=4) assert len(session.messages) <= 4 - loop.context.memory.raw_archive.assert_not_called() + archive_fn.assert_not_called() - def test_session_file_cap_archives_only_unconsolidated_part_of_dropped_prefix(self, tmp_path): - loop = _make_loop(tmp_path, session_file_max_messages=4) - loop.context.memory.raw_archive = MagicMock() - session = loop.sessions.get_or_create("cli:direct") + def test_session_enforce_file_cap_archives_only_unconsolidated_dropped_prefix(self, tmp_path): + from nanobot.session.manager import Session + archive_fn = MagicMock() + session = Session(key="cli:direct") for i in range(8): session.add_message("user", f"u{i}") session.last_consolidated = 2 - loop._enforce_session_file_cap(session) + session.enforce_file_cap(on_archive=archive_fn, limit=4) assert len(session.messages) <= 4 - loop.context.memory.raw_archive.assert_called_once() - archived = loop.context.memory.raw_archive.call_args.args[0] + archive_fn.assert_called_once() + archived = archive_fn.call_args.args[0] assert [m["content"] for m in archived] == ["u2", "u3"]