diff --git a/backend/routers/coverage.py b/backend/routers/coverage.py index 39c3e64..adb7b82 100644 --- a/backend/routers/coverage.py +++ b/backend/routers/coverage.py @@ -239,35 +239,72 @@ async def sync_sources(days: int = 7, db: Session = Depends(get_db)): return {"synced": seen, "sources": [r["dataSource.name"] for r in rows if r.get("dataSource.name")]} +def _build_parser_ds_index() -> dict[str, dict]: + """ + Read all parser files from /app/parsers/ and build an index: + dataSource.name (exact, from parser attributes) → {parser_name, format_type} + + Format type is "grok", "dottedJson", or "custom". + Sources with grok/dottedJson parsers are flagged as needing a proper parser. + """ + import os, re + parsers_dir = "/app/parsers" + _DS_NAME_RE = re.compile(r'"dataSource\.name"\s*:\s*"([^"]+)"') + _FORMAT_TYPE_RE = re.compile(r'"type"\s*:\s*"([^"]+)"') + + index: dict[str, dict] = {} + try: + entries = [e for e in os.scandir(parsers_dir) if e.is_file() and not e.name.startswith(".")] + except FileNotFoundError: + return index + + for entry in entries: + try: + with open(entry.path, "r", encoding="utf-8", errors="replace") as fh: + content = fh.read() + except Exception: + continue + + # Extract dataSource.name (may appear multiple times — take first) + ds_match = _DS_NAME_RE.search(content) + if not ds_match: + continue + ds_name = ds_match.group(1).strip() + + # Determine format type — look for grok/dottedJson/custom in "type" values + format_types = {m.group(1).lower() for m in _FORMAT_TYPE_RE.finditer(content)} + if "grok" in format_types: + fmt = "grok" + elif "dottedjson" in format_types: + fmt = "dottedJson" + else: + fmt = "custom" + + index[ds_name] = {"parser_name": entry.name, "format_type": fmt} + + return index + + @router.get("/map") def get_coverage_map(db: Session = Depends(get_db)): """ Source-centric coverage map. For each active dataSource.name in the SDL: - - covered = a parser is loaded for it - - parser_needed = no parser loaded + - covered = a custom parser is loaded for it (dataSource.name matches) + - parser_needed = no parser, OR parser uses grok/dottedJson format Also surfaces which STAR rules reference each source. """ active_sources = db.query(ActiveSource).order_by(ActiveSource.event_count.desc()).all() parser_fields_rows = db.query(ParserField).all() rules = db.query(ParsedRule).all() - # parser_name → set of field names + # parser_name → set of field names (for field count display) parser_index: dict[str, set] = {} for pf in parser_fields_rows: parser_index.setdefault(pf.parser_name, set()).add(pf.field_name) - # Build a fuzzy match: dataSource.name → parser_name - # Parser names like "paloalto", "palo", "okta_authentication-latest" need to match - # "Palo Alto Networks Firewall", "Okta", etc. - def _find_parser(source_name: str) -> str | None: - sn = source_name.lower().replace(" ", "").replace("-", "").replace("_", "") - for pname in parser_index: - pn = pname.lower().replace(" ", "").replace("-", "").replace("_", "") - # Direct substring match in either direction - if pn in sn or sn in pn: - return pname - return None + # Build exact dataSource.name → {parser_name, format_type} index from parser files + ds_index = _build_parser_ds_index() # Build rule index: source_name → rules that reference it rule_by_source: dict[str, list] = {} @@ -285,8 +322,21 @@ def get_coverage_map(db: Session = Depends(get_db)): needed_count = 0 for src in active_sources: - matched_parser = _find_parser(src.source_name) - status = "covered" if matched_parser else "parser_needed" + parser_info = ds_index.get(src.source_name) + if parser_info and parser_info["format_type"] == "custom": + status = "covered" + matched_parser = parser_info["parser_name"] + format_type = "custom" + elif parser_info: + # grok or dottedJson — flag as needing a proper parser + status = "parser_needed" + matched_parser = parser_info["parser_name"] + format_type = parser_info["format_type"] + else: + status = "parser_needed" + matched_parser = None + format_type = None + if status == "covered": covered_count += 1 else: @@ -299,6 +349,7 @@ def get_coverage_map(db: Session = Depends(get_db)): "event_count": src.event_count, "status": status, "parser": matched_parser, + "format_type": format_type, "parser_fields": len(parser_index.get(matched_parser, set())) if matched_parser else 0, "rules": rules_for_src, "rule_count": len(rules_for_src), diff --git a/backend/routers/ingest.py b/backend/routers/ingest.py index 0af85b4..5b03f9a 100644 --- a/backend/routers/ingest.py +++ b/backend/routers/ingest.py @@ -14,16 +14,32 @@ def _date_range(days: int) -> tuple[str, str]: ) +def _date_range_hours(hours: int) -> tuple[str, str]: + now = datetime.utcnow() + return ( + (now - timedelta(hours=hours)).strftime("%Y-%m-%dT%H:%M:%S.000Z"), + now.strftime("%Y-%m-%dT%H:%M:%S.000Z"), + ) + + @router.get("/top-sources") -async def get_top_sources(days: int = Query(7, ge=1, le=90)): +async def get_top_sources( + days: int = Query(None, ge=1, le=90), + hours: int = Query(None, ge=1, le=24), +): """Top log sources by event count over the given period.""" - from_dt, to_dt = _date_range(days) + if hours is not None: + from_dt, to_dt = _date_range_hours(hours) + period_label = f"{hours}h" + else: + from_dt, to_dt = _date_range(days or 7) + period_label = f"{days or 7}d" query = "| group events=count() by dataSource.name | sort -events | limit 25" try: result = await s1_client.run_powerquery(query, from_dt, to_dt) except Exception as e: raise HTTPException(502, f"PowerQuery error: {e}") - return {"period_days": days, "data": result.get("events", [])} + return {"period": period_label, "data": result.get("events", [])} @router.get("/by-event-type") diff --git a/frontend/index.html b/frontend/index.html index 8130777..90c4ea8 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -77,6 +77,13 @@ function barChart(rows, labelKey, valueKey) { const chartH = H - padT - padB const bw = Math.max(8, Math.floor(chartW / rows.length) - 4) + const defs = ` + + + + + ` + // Y-axis ticks (4 lines) const ticks = [0, 0.25, 0.5, 0.75, 1].map(t => { const val = Math.round(max * t) @@ -95,12 +102,12 @@ function barChart(rows, labelKey, valueKey) { const lbl = esc(String(r[labelKey] || '').slice(5, 10)) // value label on top of bar const valLbl = val >= 1000 ? (val/1000).toFixed(1)+'k' : val - return ` - ${valLbl} + return ` + ${valLbl} ${lbl}` }).join('') - return `${ticks}${bars}` + return `${defs}${ticks}${bars}` } // ── Home ────────────────────────────────────────────────────────────────── @@ -260,6 +267,16 @@ function cvSetFilter(f) { const sources = cvData.sources.filter(s => f === 'all' || s.status === f) + function parserCell(s) { + if (s.status === 'covered') { + return `${esc(s.parser)} (${s.parser_fields} fields)` + } + if (s.parser && s.format_type && s.format_type !== 'custom') { + return `⚠ ${esc(s.parser)} (${esc(s.format_type)} — needs custom parser)` + } + return `⚠ No parser loaded` + } + document.getElementById('cv-table').innerHTML = sources.length === 0 ? '

No sources match this filter.

' : `
@@ -275,9 +292,7 @@ function cvSetFilter(f) { - + `).join('')}
${esc(s.source_name)} ${(s.event_count||0).toLocaleString()} ${LABELS[s.status]||s.status} - ${s.status === 'parser_needed' ? '⚠ No parser loaded' : esc(s.parser) + ' (' + s.parser_fields + ' fields)'} - ${parserCell(s)} ${s.rules?.length ? s.rules.map(r=>esc(r.rule)).join(', ') : '—'}
` @@ -286,8 +301,13 @@ function cvSetFilter(f) { // ── Ingest ──────────────────────────────────────────────────────────────── let igDays = 3 +let igHours = null // null = days mode, number = hours mode function renderIngest() { + const btns = [ + {label:'1h', onclick:"igSetHours(1)", id:'ig-h1'}, + ...([3,5,7].map(d => ({label:`${d}d`, onclick:`igSetDays(${d})`, id:`ig-d${d}`}))) + ] set(`
@@ -295,8 +315,8 @@ function renderIngest() {

Event volume · cost projection · filter simulator

- ${[3,5,7].map(d=>``).join('')} + ${btns.map(b=>``).join('')}
@@ -340,15 +360,28 @@ function renderIngest() {
`) - igSetDays(igDays) + igUpdateButtons() + igLoad() +} + +function igUpdateButtons() { + const active = igHours ? `ig-h${igHours}` : `ig-d${igDays}` + ;[{id:'ig-h1'},{id:'ig-d3'},{id:'ig-d5'},{id:'ig-d7'}].forEach(({id}) => { + const b = document.getElementById(id) + if (!b) return + b.className = `px-3 py-1.5 text-xs rounded-lg border transition-colors ${id===active ? 'bg-purple-700 border-purple-600 text-white' : 'border-gray-700 text-gray-400 hover:border-gray-500'}` + }) } function igSetDays(d) { - igDays = d - ;[3,5,7].forEach(n => { - const b = document.getElementById(`ig-d${n}`) - if (b) b.className = `px-3 py-1.5 text-xs rounded-lg border transition-colors ${n===d ? 'bg-purple-700 border-purple-600 text-white' : 'border-gray-700 text-gray-400 hover:border-gray-500'}` - }) + igDays = d; igHours = null + igUpdateButtons() + igLoad() +} + +function igSetHours(h) { + igHours = h + igUpdateButtons() igLoad() } @@ -357,13 +390,19 @@ async function igLoad() { document.getElementById('ig-chart').innerHTML = spinner document.getElementById('ig-sources').innerHTML = spinner + const sourcesUrl = igHours + ? `/api/ingest/top-sources?hours=${igHours}` + : `/api/ingest/top-sources?days=${igDays}` + const [dailyResult, sourcesResult] = await Promise.allSettled([ - apiGet(`/api/ingest/daily-volume?days=${igDays}`), - apiGet(`/api/ingest/top-sources?days=${igDays}`) + igHours ? Promise.resolve(null) : apiGet(`/api/ingest/daily-volume?days=${igDays}`), + apiGet(sourcesUrl) ]) // Daily volume chart - if (dailyResult.status === 'fulfilled') { + if (igHours) { + document.getElementById('ig-chart').innerHTML = `

Daily volume chart not available for 1h view — see Top Sources below for breakdown by source.

` + } else if (dailyResult.status === 'fulfilled') { document.getElementById('ig-chart').innerHTML = barChart(dailyResult.value, 'date', 'events') } else { document.getElementById('ig-chart').innerHTML = `

${esc(dailyResult.reason?.message ?? 'Error')}

`