fix(memory): harden consolidation with try/except on token estimation and chunk size cap

- Wrap both token estimation calls in try/except to prevent silent failures
  from crashing the consolidation cycle
- Add _MAX_CHUNK_MESSAGES = 60 to cap messages per consolidation round,
  avoiding oversized chunks being sent to the consolidation LLM
- Improve idle log to include unconsolidated message count for easier debugging

These are purely defensive improvements with no behaviour change for
normal sessions.
This commit is contained in:
comadreja 2026-04-09 11:27:15 -05:00 committed by Xubin Ren
parent 363a0704db
commit bfe53ebb10

View File

@ -347,6 +347,7 @@ class Consolidator:
"""Lightweight consolidation: summarizes evicted messages into history.jsonl.""" """Lightweight consolidation: summarizes evicted messages into history.jsonl."""
_MAX_CONSOLIDATION_ROUNDS = 5 _MAX_CONSOLIDATION_ROUNDS = 5
_MAX_CHUNK_MESSAGES = 60 # hard cap per consolidation round
_SAFETY_BUFFER = 1024 # extra headroom for tokenizer estimation drift _SAFETY_BUFFER = 1024 # extra headroom for tokenizer estimation drift
@ -461,16 +462,22 @@ class Consolidator:
async with lock: async with lock:
budget = self.context_window_tokens - self.max_completion_tokens - self._SAFETY_BUFFER budget = self.context_window_tokens - self.max_completion_tokens - self._SAFETY_BUFFER
target = budget // 2 target = budget // 2
try:
estimated, source = self.estimate_session_prompt_tokens(session) estimated, source = self.estimate_session_prompt_tokens(session)
except Exception:
logger.exception("Token estimation failed for {}", session.key)
estimated, source = 0, "error"
if estimated <= 0: if estimated <= 0:
return return
if estimated < budget: if estimated < budget:
unconsolidated_count = len(session.messages) - session.last_consolidated
logger.debug( logger.debug(
"Token consolidation idle {}: {}/{} via {}", "Token consolidation idle {}: {}/{} via {}, msgs={}",
session.key, session.key,
estimated, estimated,
self.context_window_tokens, self.context_window_tokens,
source, source,
unconsolidated_count,
) )
return return
@ -492,6 +499,10 @@ class Consolidator:
if not chunk: if not chunk:
return return
if len(chunk) > self._MAX_CHUNK_MESSAGES:
chunk = chunk[:self._MAX_CHUNK_MESSAGES]
end_idx = session.last_consolidated + len(chunk)
logger.info( logger.info(
"Token consolidation round {} for {}: {}/{} via {}, chunk={} msgs", "Token consolidation round {} for {}: {}/{} via {}, chunk={} msgs",
round_num, round_num,
@ -506,7 +517,11 @@ class Consolidator:
session.last_consolidated = end_idx session.last_consolidated = end_idx
self.sessions.save(session) self.sessions.save(session)
try:
estimated, source = self.estimate_session_prompt_tokens(session) estimated, source = self.estimate_session_prompt_tokens(session)
except Exception:
logger.exception("Token estimation failed for {}", session.key)
estimated, source = 0, "error"
if estimated <= 0: if estimated <= 0:
return return