from datetime import datetime, timedelta from fastapi import APIRouter, Query, HTTPException from pydantic import BaseModel from services import s1_client from services.async_cache import async_ttl_cache, cache_stats, cache_clear router = APIRouter() # Dashboard endpoints can be expensive on busy tenants. Cache results in-process # for a short TTL so reloads and parallel widgets are instant. Pass ?nocache=1 # to bypass for a forced refresh. _DASHBOARD_TTL_SECONDS = 300 def _date_range(days: int) -> tuple[str, str]: now = datetime.utcnow() return ( (now - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%S.000Z"), now.strftime("%Y-%m-%dT%H:%M:%S.000Z"), ) def _date_range_hours(hours: int) -> tuple[str, str]: now = datetime.utcnow() return ( (now - timedelta(hours=hours)).strftime("%Y-%m-%dT%H:%M:%S.000Z"), now.strftime("%Y-%m-%dT%H:%M:%S.000Z"), ) @async_ttl_cache(ttl_seconds=_DASHBOARD_TTL_SECONDS) async def _top_sources_cached(hours: int) -> dict: """Cache key: hours only. days is normalised to hours upstream.""" from_dt, to_dt = _date_range_hours(hours) query = "| group events=count() by dataSource.name | sort -events | limit 25" result = await s1_client.run_powerquery(query, from_dt, to_dt) return {"data": result.get("events", [])} @router.get("/top-sources") async def get_top_sources( days: int = Query(None, ge=1, le=90), hours: int = Query(None, ge=1, le=720), nocache: bool = Query(False, description="Bypass dashboard cache"), ): """Top log sources by event count. Note: SDL returns 'internal Scalyr error' when this query uses day-scale timestamps on busy tenants, but the same window expressed in hours runs fine. We normalise days -> hours internally for stability. """ if hours is None and days is None: days = 7 if hours is None: hours = days * 24 period_label = f"{days}d" else: period_label = f"{hours}h" try: cached = await _top_sources_cached(hours, nocache=nocache) except Exception as e: raise HTTPException(502, f"PowerQuery error: {e}") return {"period": period_label, "data": cached["data"]} @async_ttl_cache(ttl_seconds=_DASHBOARD_TTL_SECONDS) async def _by_event_type_cached(days: int) -> dict: # Same days->hours normalisation as top-sources for tenant stability. from_dt, to_dt = _date_range_hours(days * 24) query = "| group events=count() by dataSource.name, event.type | sort -events | limit 100" result = await s1_client.run_powerquery(query, from_dt, to_dt) return {"data": result.get("events", [])} @router.get("/by-event-type") async def get_by_event_type( days: int = Query(7, ge=1, le=90), nocache: bool = Query(False), ): """Event counts grouped by source and event type.""" try: cached = await _by_event_type_cached(days, nocache=nocache) except Exception as e: raise HTTPException(502, f"PowerQuery error: {e}") return {"period_days": days, "data": cached["data"]} @async_ttl_cache(ttl_seconds=_DASHBOARD_TTL_SECONDS) async def _daily_volume_cached(days: int) -> list: import asyncio now = datetime.utcnow() points = min(days, 7) async def _fetch_day(i: int) -> dict: day_from = (now - timedelta(days=i + 1)).strftime("%Y-%m-%dT00:00:00.000Z") day_to = (now - timedelta(days=i)).strftime("%Y-%m-%dT00:00:00.000Z") label = (now - timedelta(days=i + 1)).strftime("%Y-%m-%d") try: result = await s1_client.run_powerquery("| group total=count()", day_from, day_to) events_list = result.get("events", []) if isinstance(result, dict) else [] count = events_list[0].get("total", 0) if events_list else 0 except Exception: count = 0 return {"date": label, "events": count} results = await asyncio.gather(*[_fetch_day(i) for i in range(points)]) return list(reversed(results)) @router.get("/daily-volume") async def get_daily_volume( days: int = Query(5, ge=1, le=7), nocache: bool = Query(False), ): """Total event count per day — queries run in parallel.""" return await _daily_volume_cached(days, nocache=nocache) @router.get("/cache-stats") def ingest_cache_stats(): """Inspect dashboard cache (entry count + TTL remaining per key).""" return cache_stats() @router.delete("/cache") def ingest_cache_clear(): """Forcefully wipe the dashboard cache (next call refetches from SDL).""" return {"cleared": cache_clear()} class FilterRule(BaseModel): source: str = "" event_type: str = "" days: int = 7 gb_per_million_events: float = 0.5 @router.post("/simulate-filter") async def simulate_filter(rule: FilterRule): """Estimate how many events and GB would be eliminated by an exclusion filter.""" from_dt, to_dt = _date_range(rule.days) clauses = [] if rule.source: clauses.append(f"dataSource.name=='{rule.source}'") if rule.event_type: clauses.append(f"event.type=='{rule.event_type}'") if clauses: filter_expr = " and ".join(clauses) query = f"| filter {filter_expr} | group events=count()" else: query = "| group events=count()" try: result = await s1_client.run_powerquery(query, from_dt, to_dt) events = (result.get("events") or [{}])[0].get("events", 0) if isinstance(result.get("events"), list) else 0 except Exception as e: raise HTTPException(502, f"PowerQuery error: {e}") estimated_gb = round(events / 1_000_000 * rule.gb_per_million_events, 3) monthly_events = int(events / rule.days * 30) monthly_gb = round(monthly_events / 1_000_000 * rule.gb_per_million_events, 2) return { "period_days": rule.days, "matched_events": events, "estimated_gb_period": estimated_gb, "projected_monthly_events": monthly_events, "projected_monthly_gb": monthly_gb, "filter": {"source": rule.source, "event_type": rule.event_type}, }