mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-05 01:05:51 +00:00
fix(status): correct context percentage calculation and sync consolidator
- Pass resolved self.context_window_tokens to Consolidator instead of raw parameter that could be None, preventing consolidation failures - Calculate percentage against input budget (ctx - max_completion - 1024) instead of raw context window, consistent with Consolidator/snip formulas - Pass actual max_completion_tokens from provider to build_status_content - Cap percentage display at 999 to prevent runaway values - Add tests for budget-based percentage and cap behavior
This commit is contained in:
parent
92a5125108
commit
e1fdca7d40
@ -226,7 +226,7 @@ class AgentLoop:
|
||||
provider=provider,
|
||||
model=self.model,
|
||||
sessions=self.sessions,
|
||||
context_window_tokens=context_window_tokens,
|
||||
context_window_tokens=self.context_window_tokens,
|
||||
build_messages=self.context.build_messages,
|
||||
get_tool_definitions=self.tools.get_definitions,
|
||||
max_completion_tokens=provider.generation.max_tokens,
|
||||
|
||||
@ -91,6 +91,9 @@ async def cmd_status(ctx: CommandContext) -> OutboundMessage:
|
||||
context_tokens_estimate=ctx_est,
|
||||
search_usage_text=search_usage_text,
|
||||
active_task_count=task_count,
|
||||
max_completion_tokens=getattr(
|
||||
getattr(loop.provider, "generation", None), "max_tokens", 8192
|
||||
),
|
||||
),
|
||||
metadata={**dict(ctx.msg.metadata or {}), "render_as": "text"},
|
||||
)
|
||||
|
||||
@ -401,6 +401,7 @@ def build_status_content(
|
||||
context_tokens_estimate: int,
|
||||
search_usage_text: str | None = None,
|
||||
active_task_count: int = 0,
|
||||
max_completion_tokens: int = 8192,
|
||||
) -> str:
|
||||
"""Build a human-readable runtime status snapshot.
|
||||
|
||||
@ -419,7 +420,9 @@ def build_status_content(
|
||||
last_out = last_usage.get("completion_tokens", 0)
|
||||
cached = last_usage.get("cached_tokens", 0)
|
||||
ctx_total = max(context_window_tokens, 0)
|
||||
ctx_pct = int((context_tokens_estimate / ctx_total) * 100) if ctx_total > 0 else 0
|
||||
# Budget mirrors Consolidator formula: ctx_window - max_completion - _SAFETY_BUFFER
|
||||
ctx_budget = max(ctx_total - int(max_completion_tokens) - 1024, 1)
|
||||
ctx_pct = min(int((context_tokens_estimate / ctx_budget) * 100), 999) if ctx_budget > 0 else 0
|
||||
ctx_used_str = f"{context_tokens_estimate // 1000}k" if context_tokens_estimate >= 1000 else str(context_tokens_estimate)
|
||||
ctx_total_str = f"{ctx_total // 1000}k" if ctx_total > 0 else "n/a"
|
||||
token_line = f"\U0001f4ca Tokens: {last_in} in / {last_out} out"
|
||||
@ -429,7 +432,7 @@ def build_status_content(
|
||||
f"\U0001f408 nanobot v{version}",
|
||||
f"\U0001f9e0 Model: {model}",
|
||||
token_line,
|
||||
f"\U0001f4da Context: {ctx_used_str}/{ctx_total_str} ({ctx_pct}%)",
|
||||
f"\U0001f4da Context: {ctx_used_str}/{ctx_total_str} ({ctx_pct}% of input budget)",
|
||||
f"\U0001f4ac Session: {session_msg_count} messages",
|
||||
f"\u23f1 Uptime: {uptime}",
|
||||
f"\u26a1 Tasks: {active_task_count} active",
|
||||
|
||||
@ -149,7 +149,7 @@ class TestRestartCommand:
|
||||
assert response is not None
|
||||
assert "Model: test-model" in response.content
|
||||
assert "Tokens: 0 in / 0 out" in response.content
|
||||
assert "Context: 20k/65k (31%)" in response.content
|
||||
assert "Context: 20k/65k (31% of input budget)" in response.content
|
||||
assert "Session: 3 messages" in response.content
|
||||
assert "Uptime: 2m 5s" in response.content
|
||||
assert "Tasks: 0 active" in response.content
|
||||
@ -213,7 +213,7 @@ class TestRestartCommand:
|
||||
|
||||
assert response is not None
|
||||
assert "Tokens: 1200 in / 34 out" in response.content
|
||||
assert "Context: 1k/65k (1%)" in response.content
|
||||
assert "Context: 1k/65k (1% of input budget)" in response.content
|
||||
assert "Tasks: 0 active" in response.content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -59,3 +59,34 @@ def test_status_100_percent_cached():
|
||||
context_tokens_estimate=3000,
|
||||
)
|
||||
assert "100% cached" in content
|
||||
|
||||
|
||||
def test_status_context_pct_uses_budget_not_total():
|
||||
"""Percentage should be calculated against input budget, not raw context window."""
|
||||
content = build_status_content(
|
||||
version="0.1.0",
|
||||
model="test",
|
||||
start_time=1000000.0,
|
||||
last_usage={"prompt_tokens": 2000, "completion_tokens": 300},
|
||||
context_window_tokens=128000,
|
||||
session_msg_count=10,
|
||||
context_tokens_estimate=120000,
|
||||
max_completion_tokens=8192,
|
||||
)
|
||||
# budget = 128000 - 8192 - 1024 = 118784; pct = 120000/118784*100 ≈ 101%
|
||||
assert "(101% of input budget)" in content
|
||||
|
||||
|
||||
def test_status_context_pct_capped_at_999():
|
||||
"""Extreme overflow should be capped at 999."""
|
||||
content = build_status_content(
|
||||
version="0.1.0",
|
||||
model="test",
|
||||
start_time=1000000.0,
|
||||
last_usage={"prompt_tokens": 2000, "completion_tokens": 300},
|
||||
context_window_tokens=10000,
|
||||
session_msg_count=10,
|
||||
context_tokens_estimate=100000,
|
||||
max_completion_tokens=4096,
|
||||
)
|
||||
assert "(999% of input budget)" in content
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user