From 7d19c57a5de1e8537ab318633ef39cefa07ffe48 Mon Sep 17 00:00:00 2001 From: marc Date: Fri, 22 May 2026 20:41:36 +0200 Subject: [PATCH] Ingest Dashboard: optional background cache pre-warmer Adds an asyncio background task that re-runs the heavy Ingest Dashboard queries every ~4 min (just under the 5 min TTL) so the in-process cache is always populated. First user hit on any dashboard widget then returns from cache (single-digit ms) instead of waiting 30-60s for SDL. Components: - backend/services/prewarmer.py: standalone module, opt-in via INGEST_PREWARM=1; configurable windows via INGEST_PREWARM_HOURS / INGEST_PREWARM_DAYS / INGEST_PREWARM_DAILY_VOLUME_DAYS and interval via INGEST_PREWARM_INTERVAL_SECONDS. Logs through the uvicorn logger so cycles are visible in 'docker logs'. - backend/main.py: spawn the task on FastAPI startup. - docker-compose.yml: forward INGEST_PREWARM* env vars to the backend service (default off). Observed on a busy tenant with INGEST_PREWARM=1, default windows: top-sources?days=7 first hit after restart: ~39s -> ~8ms (cache warm). Defaults to disabled (INGEST_PREWARM=0) so existing users see no behaviour change. --- backend/main.py | 8 +++ backend/services/prewarmer.py | 97 +++++++++++++++++++++++++++++++++++ docker-compose.yml | 5 ++ 3 files changed, 110 insertions(+) create mode 100644 backend/services/prewarmer.py diff --git a/backend/main.py b/backend/main.py index 869f143..16e5f8a 100644 --- a/backend/main.py +++ b/backend/main.py @@ -45,6 +45,14 @@ with engine.connect() as _conn: app = FastAPI(title="SIEM Toolkit", version="1.0.0") +@app.on_event("startup") +async def start_ingest_prewarmer(): + """Start optional background pre-warmer for the Ingest Dashboard cache. + Opt-in via INGEST_PREWARM=1. See backend/services/prewarmer.py.""" + from services import prewarmer + prewarmer.start_if_enabled() + + @app.on_event("startup") async def auto_load_detections(): """ diff --git a/backend/services/prewarmer.py b/backend/services/prewarmer.py new file mode 100644 index 0000000..de8614b --- /dev/null +++ b/backend/services/prewarmer.py @@ -0,0 +1,97 @@ +"""Background pre-warmer for the Ingest Dashboard cache. + +Opt-in via env: INGEST_PREWARM=1 +Tunable via env: INGEST_PREWARM_INTERVAL_SECONDS (default 240, just under TTL) + INGEST_PREWARM_HOURS (default "1,24,168") + INGEST_PREWARM_DAYS (default "7") + INGEST_PREWARM_DAILY_VOLUME_DAYS (default "5") + +The pre-warmer re-runs the heavy Ingest Dashboard queries every ~4 min so the +in-process TTL cache is always populated. First user hit then returns from +cache (sub-millisecond) instead of waiting 30-60s for SDL. +""" +from __future__ import annotations +import asyncio +import logging +import os +import time + +# Use the uvicorn logger so messages show up in `docker logs` alongside requests. +log = logging.getLogger("uvicorn.error") +_PREFIX = "prewarmer:" + + +def _flag_enabled() -> bool: + return os.environ.get("INGEST_PREWARM", "").lower() in ("1", "true", "yes", "on") + + +def _int_list(env: str, default: str) -> list[int]: + raw = os.environ.get(env, default) + out = [] + for tok in raw.split(","): + tok = tok.strip() + if tok and tok.isdigit(): + out.append(int(tok)) + return out + + +async def _warm_once() -> dict: + """Run all configured warm-up queries once. Returns timing summary.""" + # Local import to avoid circular dependency with FastAPI router module. + from routers.ingest import ( + _top_sources_cached, + _by_event_type_cached, + _daily_volume_cached, + ) + + hours_list = _int_list("INGEST_PREWARM_HOURS", "1,24,168") + days_list = _int_list("INGEST_PREWARM_DAYS", "7") + dv_days = _int_list("INGEST_PREWARM_DAILY_VOLUME_DAYS", "5") or [5] + + tasks: list[tuple[str, asyncio.Task]] = [] + for h in hours_list: + tasks.append((f"top-sources hours={h}", + asyncio.create_task(_top_sources_cached(h, nocache=True)))) + for d in days_list: + tasks.append((f"by-event-type days={d}", + asyncio.create_task(_by_event_type_cached(d, nocache=True)))) + for d in dv_days: + tasks.append((f"daily-volume days={d}", + asyncio.create_task(_daily_volume_cached(d, nocache=True)))) + + summary: dict[str, str] = {} + for label, task in tasks: + t0 = time.monotonic() + try: + await task + summary[label] = f"OK in {time.monotonic() - t0:.1f}s" + except Exception as e: + summary[label] = f"ERR ({e.__class__.__name__}: {str(e)[:120]})" + return summary + + +async def _loop(): + interval = int(os.environ.get("INGEST_PREWARM_INTERVAL_SECONDS", "240")) + log.info("%s starting (interval=%ds)", _PREFIX, interval) + # Tiny initial delay so we don't compete with startup work. + await asyncio.sleep(5) + while True: + try: + summary = await _warm_once() + for label, status in summary.items(): + log.info("%s %s -> %s", _PREFIX, label, status) + except asyncio.CancelledError: + raise + except Exception as e: + log.warning("%s cycle failed: %s", _PREFIX, e) + await asyncio.sleep(interval) + + +def start_if_enabled() -> asyncio.Task | None: + """Spawn the pre-warm background task if INGEST_PREWARM is enabled. + Returns the task handle, or None if disabled.""" + if not _flag_enabled(): + log.info("%s disabled (set INGEST_PREWARM=1 to enable)", _PREFIX) + return None + log.info("%s scheduling background task", _PREFIX) + return asyncio.create_task(_loop(), name="ingest-prewarmer") diff --git a/docker-compose.yml b/docker-compose.yml index 383a635..1b69237 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,6 +21,11 @@ services: - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} - DATABASE_URL=postgresql://siem:siem@db:5432/siem - DETECTIONS_FILE=/app/data/detections.json + - INGEST_PREWARM=${INGEST_PREWARM:-0} + - INGEST_PREWARM_HOURS=${INGEST_PREWARM_HOURS:-1,24,168} + - INGEST_PREWARM_DAYS=${INGEST_PREWARM_DAYS:-7} + - INGEST_PREWARM_DAILY_VOLUME_DAYS=${INGEST_PREWARM_DAILY_VOLUME_DAYS:-5} + - INGEST_PREWARM_INTERVAL_SECONDS=${INGEST_PREWARM_INTERVAL_SECONDS:-240} depends_on: db: condition: service_healthy