From b52bfddf16636f85d5805fce393721d49eb5223c Mon Sep 17 00:00:00 2001 From: Daniel Phang Date: Sat, 11 Apr 2026 00:34:48 -0700 Subject: [PATCH] fix(cron): guard _load_store against reentrant reload during job execution When on_job callbacks call list_jobs() (which triggers _load_store), the in-memory state is reloaded from disk, discarding the next_run_at_ms updates that _on_timer is actively computing. This causes jobs to re-trigger indefinitely on the next tick. Add an _executing flag around the job execution loop. While set, _load_store returns the cached store instead of reloading from disk. Includes regression test. Co-Authored-By: Claude Opus 4.6 (1M context) --- nanobot/cron/service.py | 13 ++++++++-- tests/cron/test_cron_service.py | 46 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/nanobot/cron/service.py b/nanobot/cron/service.py index 267613012..d3f74fbf8 100644 --- a/nanobot/cron/service.py +++ b/nanobot/cron/service.py @@ -80,6 +80,7 @@ class CronService: self._store: CronStore | None = None self._timer_task: asyncio.Task | None = None self._running = False + self._executing = False self.max_sleep_ms = max_sleep_ms def _load_jobs(self) -> tuple[list[CronJob], int]: @@ -171,7 +172,11 @@ class CronService: def _load_store(self) -> CronStore: """Load jobs from disk. Reloads automatically if file was modified externally. - Reload every time because it needs to merge operations on the jobs object from other instances. + - Skip reload when _executing to prevent on_job callbacks (e.g. list_jobs) + from replacing in-memory state that _on_timer is actively modifying. """ + if self._executing and self._store is not None: + return self._store jobs, version = self._load_jobs() self._store = CronStore(version=version, jobs=jobs) self._merge_action() @@ -298,8 +303,12 @@ class CronService: if j.enabled and j.state.next_run_at_ms and now >= j.state.next_run_at_ms ] - for job in due_jobs: - await self._execute_job(job) + self._executing = True + try: + for job in due_jobs: + await self._execute_job(job) + finally: + self._executing = False self._save_store() self._arm_timer() diff --git a/tests/cron/test_cron_service.py b/tests/cron/test_cron_service.py index f1956d8d2..4aa7fc06d 100644 --- a/tests/cron/test_cron_service.py +++ b/tests/cron/test_cron_service.py @@ -479,3 +479,49 @@ def test_update_job_sentinel_channel_and_to(tmp_path) -> None: assert isinstance(result, CronJob) assert result.payload.channel is None assert result.payload.to is None + + +@pytest.mark.asyncio +async def test_list_jobs_during_on_job_does_not_cause_stale_reload(tmp_path) -> None: + """Regression: if the bot calls list_jobs (which reloads from disk) during + on_job execution, the in-memory next_run_at_ms update must not be lost. + Previously this caused an infinite re-trigger loop.""" + store_path = tmp_path / "cron" / "jobs.json" + execution_count = 0 + + async def on_job_that_lists(job): + nonlocal execution_count + execution_count += 1 + # Simulate the bot calling cron(action=list) mid-execution + service.list_jobs() + + service = CronService(store_path, on_job=on_job_that_lists, max_sleep_ms=100) + await service.start() + + # Add two jobs scheduled in the past so they're immediately due + now_ms = int(time.time() * 1000) + for name in ("job-a", "job-b"): + service.add_job( + name=name, + schedule=CronSchedule(kind="every", every_ms=3_600_000), + message="test", + ) + # Force next_run to the past so _on_timer picks them up + for job in service._store.jobs: + job.state.next_run_at_ms = now_ms - 1000 + service._save_store() + service._arm_timer() + + # Let the timer fire once + await asyncio.sleep(0.3) + service.stop() + + # Each job should have run exactly once, not looped + assert execution_count == 2 + + # Verify next_run_at_ms was persisted correctly (in the future) + raw = json.loads(store_path.read_text()) + for j in raw["jobs"]: + next_run = j["state"]["nextRunAtMs"] + assert next_run is not None + assert next_run > now_ms, f"Job '{j['name']}' next_run should be in the future"