From 7d19c57a5de1e8537ab318633ef39cefa07ffe48 Mon Sep 17 00:00:00 2001
From: marc <m@m.m>
Date: Fri, 22 May 2026 20:41:36 +0200
Subject: [PATCH] Ingest Dashboard: optional background cache pre-warmer

Adds an asyncio background task that re-runs the heavy Ingest Dashboard
queries every ~4 min (just under the 5 min TTL) so the in-process cache
is always populated. First user hit on any dashboard widget then returns
from cache (single-digit ms) instead of waiting 30-60s for SDL.

Components:
  - backend/services/prewarmer.py: standalone module, opt-in via
    INGEST_PREWARM=1; configurable windows via INGEST_PREWARM_HOURS /
    INGEST_PREWARM_DAYS / INGEST_PREWARM_DAILY_VOLUME_DAYS and interval
    via INGEST_PREWARM_INTERVAL_SECONDS. Logs through the uvicorn logger
    so cycles are visible in 'docker logs'.
  - backend/main.py: spawn the task on FastAPI startup.
  - docker-compose.yml: forward INGEST_PREWARM* env vars to the
    backend service (default off).

Observed on a busy tenant with INGEST_PREWARM=1, default windows:
  top-sources?days=7 first hit after restart: ~39s -> ~8ms (cache warm).

Defaults to disabled (INGEST_PREWARM=0) so existing users see no
behaviour change.
---
 backend/main.py               |  8 +++
 backend/services/prewarmer.py | 97 +++++++++++++++++++++++++++++++++++
 docker-compose.yml            |  5 ++
 3 files changed, 110 insertions(+)
 create mode 100644 backend/services/prewarmer.py

diff --git a/backend/main.py b/backend/main.py
index 869f143..16e5f8a 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -45,6 +45,14 @@ with engine.connect() as _conn:
 app = FastAPI(title="SIEM Toolkit", version="1.0.0")
 
 
+@app.on_event("startup")
+async def start_ingest_prewarmer():
+    """Start optional background pre-warmer for the Ingest Dashboard cache.
+    Opt-in via INGEST_PREWARM=1. See backend/services/prewarmer.py."""
+    from services import prewarmer
+    prewarmer.start_if_enabled()
+
+
 @app.on_event("startup")
 async def auto_load_detections():
     """
diff --git a/backend/services/prewarmer.py b/backend/services/prewarmer.py
new file mode 100644
index 0000000..de8614b
--- /dev/null
+++ b/backend/services/prewarmer.py
@@ -0,0 +1,97 @@
+"""Background pre-warmer for the Ingest Dashboard cache.
+
+Opt-in via env: INGEST_PREWARM=1
+Tunable via env: INGEST_PREWARM_INTERVAL_SECONDS (default 240, just under TTL)
+                 INGEST_PREWARM_HOURS  (default "1,24,168")
+                 INGEST_PREWARM_DAYS   (default "7")
+                 INGEST_PREWARM_DAILY_VOLUME_DAYS (default "5")
+
+The pre-warmer re-runs the heavy Ingest Dashboard queries every ~4 min so the
+in-process TTL cache is always populated. First user hit then returns from
+cache (sub-millisecond) instead of waiting 30-60s for SDL.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import os
+import time
+
+# Use the uvicorn logger so messages show up in `docker logs` alongside requests.
+log = logging.getLogger("uvicorn.error")
+_PREFIX = "prewarmer:"
+
+
+def _flag_enabled() -> bool:
+    return os.environ.get("INGEST_PREWARM", "").lower() in ("1", "true", "yes", "on")
+
+
+def _int_list(env: str, default: str) -> list[int]:
+    raw = os.environ.get(env, default)
+    out = []
+    for tok in raw.split(","):
+        tok = tok.strip()
+        if tok and tok.isdigit():
+            out.append(int(tok))
+    return out
+
+
+async def _warm_once() -> dict:
+    """Run all configured warm-up queries once. Returns timing summary."""
+    # Local import to avoid circular dependency with FastAPI router module.
+    from routers.ingest import (
+        _top_sources_cached,
+        _by_event_type_cached,
+        _daily_volume_cached,
+    )
+
+    hours_list = _int_list("INGEST_PREWARM_HOURS", "1,24,168")
+    days_list = _int_list("INGEST_PREWARM_DAYS", "7")
+    dv_days = _int_list("INGEST_PREWARM_DAILY_VOLUME_DAYS", "5") or [5]
+
+    tasks: list[tuple[str, asyncio.Task]] = []
+    for h in hours_list:
+        tasks.append((f"top-sources hours={h}",
+                      asyncio.create_task(_top_sources_cached(h, nocache=True))))
+    for d in days_list:
+        tasks.append((f"by-event-type days={d}",
+                      asyncio.create_task(_by_event_type_cached(d, nocache=True))))
+    for d in dv_days:
+        tasks.append((f"daily-volume days={d}",
+                      asyncio.create_task(_daily_volume_cached(d, nocache=True))))
+
+    summary: dict[str, str] = {}
+    for label, task in tasks:
+        t0 = time.monotonic()
+        try:
+            await task
+            summary[label] = f"OK in {time.monotonic() - t0:.1f}s"
+        except Exception as e:
+            summary[label] = f"ERR ({e.__class__.__name__}: {str(e)[:120]})"
+    return summary
+
+
+async def _loop():
+    interval = int(os.environ.get("INGEST_PREWARM_INTERVAL_SECONDS", "240"))
+    log.info("%s starting (interval=%ds)", _PREFIX, interval)
+    # Tiny initial delay so we don't compete with startup work.
+    await asyncio.sleep(5)
+    while True:
+        try:
+            summary = await _warm_once()
+            for label, status in summary.items():
+                log.info("%s %s -> %s", _PREFIX, label, status)
+        except asyncio.CancelledError:
+            raise
+        except Exception as e:
+            log.warning("%s cycle failed: %s", _PREFIX, e)
+        await asyncio.sleep(interval)
+
+
+def start_if_enabled() -> asyncio.Task | None:
+    """Spawn the pre-warm background task if INGEST_PREWARM is enabled.
+    Returns the task handle, or None if disabled."""
+    if not _flag_enabled():
+        log.info("%s disabled (set INGEST_PREWARM=1 to enable)", _PREFIX)
+        return None
+    log.info("%s scheduling background task", _PREFIX)
+    return asyncio.create_task(_loop(), name="ingest-prewarmer")
diff --git a/docker-compose.yml b/docker-compose.yml
index 383a635..1b69237 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -21,6 +21,11 @@ services:
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
       - DATABASE_URL=postgresql://siem:siem@db:5432/siem
       - DETECTIONS_FILE=/app/data/detections.json
+      - INGEST_PREWARM=${INGEST_PREWARM:-0}
+      - INGEST_PREWARM_HOURS=${INGEST_PREWARM_HOURS:-1,24,168}
+      - INGEST_PREWARM_DAYS=${INGEST_PREWARM_DAYS:-7}
+      - INGEST_PREWARM_DAILY_VOLUME_DAYS=${INGEST_PREWARM_DAILY_VOLUME_DAYS:-5}
+      - INGEST_PREWARM_INTERVAL_SECONDS=${INGEST_PREWARM_INTERVAL_SECONDS:-240}
     depends_on:
       db:
         condition: service_healthy