fix(status): correct context percentage calculation and sync consolidator

- Pass resolved self.context_window_tokens to Consolidator instead of
  raw parameter that could be None, preventing consolidation failures
- Calculate percentage against input budget (ctx - max_completion - 1024)
  instead of raw context window, consistent with Consolidator/snip formulas
- Pass actual max_completion_tokens from provider to build_status_content
- Cap percentage display at 999 to prevent runaway values
- Add tests for budget-based percentage and cap behavior
This commit is contained in:
chengyongru 2026-04-16 14:37:01 +08:00 committed by Xubin Ren
parent 92a5125108
commit e1fdca7d40
5 changed files with 42 additions and 5 deletions

View File

@ -226,7 +226,7 @@ class AgentLoop:
provider=provider,
model=self.model,
sessions=self.sessions,
context_window_tokens=context_window_tokens,
context_window_tokens=self.context_window_tokens,
build_messages=self.context.build_messages,
get_tool_definitions=self.tools.get_definitions,
max_completion_tokens=provider.generation.max_tokens,

View File

@ -91,6 +91,9 @@ async def cmd_status(ctx: CommandContext) -> OutboundMessage:
context_tokens_estimate=ctx_est,
search_usage_text=search_usage_text,
active_task_count=task_count,
max_completion_tokens=getattr(
getattr(loop.provider, "generation", None), "max_tokens", 8192
),
),
metadata={**dict(ctx.msg.metadata or {}), "render_as": "text"},
)

View File

@ -401,6 +401,7 @@ def build_status_content(
context_tokens_estimate: int,
search_usage_text: str | None = None,
active_task_count: int = 0,
max_completion_tokens: int = 8192,
) -> str:
"""Build a human-readable runtime status snapshot.
@ -419,7 +420,9 @@ def build_status_content(
last_out = last_usage.get("completion_tokens", 0)
cached = last_usage.get("cached_tokens", 0)
ctx_total = max(context_window_tokens, 0)
ctx_pct = int((context_tokens_estimate / ctx_total) * 100) if ctx_total > 0 else 0
# Budget mirrors Consolidator formula: ctx_window - max_completion - _SAFETY_BUFFER
ctx_budget = max(ctx_total - int(max_completion_tokens) - 1024, 1)
ctx_pct = min(int((context_tokens_estimate / ctx_budget) * 100), 999) if ctx_budget > 0 else 0
ctx_used_str = f"{context_tokens_estimate // 1000}k" if context_tokens_estimate >= 1000 else str(context_tokens_estimate)
ctx_total_str = f"{ctx_total // 1000}k" if ctx_total > 0 else "n/a"
token_line = f"\U0001f4ca Tokens: {last_in} in / {last_out} out"
@ -429,7 +432,7 @@ def build_status_content(
f"\U0001f408 nanobot v{version}",
f"\U0001f9e0 Model: {model}",
token_line,
f"\U0001f4da Context: {ctx_used_str}/{ctx_total_str} ({ctx_pct}%)",
f"\U0001f4da Context: {ctx_used_str}/{ctx_total_str} ({ctx_pct}% of input budget)",
f"\U0001f4ac Session: {session_msg_count} messages",
f"\u23f1 Uptime: {uptime}",
f"\u26a1 Tasks: {active_task_count} active",

View File

@ -149,7 +149,7 @@ class TestRestartCommand:
assert response is not None
assert "Model: test-model" in response.content
assert "Tokens: 0 in / 0 out" in response.content
assert "Context: 20k/65k (31%)" in response.content
assert "Context: 20k/65k (31% of input budget)" in response.content
assert "Session: 3 messages" in response.content
assert "Uptime: 2m 5s" in response.content
assert "Tasks: 0 active" in response.content
@ -213,7 +213,7 @@ class TestRestartCommand:
assert response is not None
assert "Tokens: 1200 in / 34 out" in response.content
assert "Context: 1k/65k (1%)" in response.content
assert "Context: 1k/65k (1% of input budget)" in response.content
assert "Tasks: 0 active" in response.content
@pytest.mark.asyncio

View File

@ -59,3 +59,34 @@ def test_status_100_percent_cached():
context_tokens_estimate=3000,
)
assert "100% cached" in content
def test_status_context_pct_uses_budget_not_total():
"""Percentage should be calculated against input budget, not raw context window."""
content = build_status_content(
version="0.1.0",
model="test",
start_time=1000000.0,
last_usage={"prompt_tokens": 2000, "completion_tokens": 300},
context_window_tokens=128000,
session_msg_count=10,
context_tokens_estimate=120000,
max_completion_tokens=8192,
)
# budget = 128000 - 8192 - 1024 = 118784; pct = 120000/118784*100 ≈ 101%
assert "(101% of input budget)" in content
def test_status_context_pct_capped_at_999():
"""Extreme overflow should be capped at 999."""
content = build_status_content(
version="0.1.0",
model="test",
start_time=1000000.0,
last_usage={"prompt_tokens": 2000, "completion_tokens": 300},
context_window_tokens=10000,
session_msg_count=10,
context_tokens_estimate=100000,
max_completion_tokens=4096,
)
assert "(999% of input budget)" in content