From f0bd56aee894b18ad69d1878c80c52075d6144bb Mon Sep 17 00:00:00 2001 From: Mick <119439091+mickbrowns1@users.noreply.github.com> Date: Tue, 19 May 2026 12:31:48 -0400 Subject: [PATCH] Rewrite coverage map as source-centric view MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously showed field-level coverage (rule fields vs parser fields). Now shows per-dataSource.name coverage: is a parser loaded for each active ingest source? - New ActiveSource DB model stores live sources from SDL - New POST /api/coverage/sync-sources endpoint runs PowerQuery to fetch current dataSource.names and their event counts, stores in DB - GET /api/coverage/map now returns per-source status: covered = a loaded parser matches this source name parser_needed = source is ingesting but no parser is loaded - Parser matching uses fuzzy substring (handles "palo"→"Palo Alto Networks Firewall") - Coverage table shows: source name, 7d event count, status, matched parser + field count, STAR rules - Frontend: new "Sync Live Sources" button, updated stats cards, updated filter tabs - Removed field-level view (was confusing — parser_needed on a field ≠ missing parser for a source) Co-Authored-By: Claude Sonnet 4.6 --- backend/db.py | 8 ++ backend/routers/coverage.py | 142 +++++++++++++++++++++++++----------- frontend/index.html | 97 ++++++++++++++---------- 3 files changed, 164 insertions(+), 83 deletions(-) diff --git a/backend/db.py b/backend/db.py index 0b4a9d9..15708aa 100644 --- a/backend/db.py +++ b/backend/db.py @@ -30,6 +30,14 @@ class ParserField(Base): field_type = Column(String) +class ActiveSource(Base): + __tablename__ = "active_sources" + id = Column(Integer, primary_key=True) + source_name = Column(String, unique=True, index=True) + event_count = Column(Integer, default=0) + synced_at = Column(DateTime, default=datetime.utcnow) + + class IngestSnapshot(Base): __tablename__ = "ingest_snapshots" id = Column(Integer, primary_key=True) diff --git a/backend/routers/coverage.py b/backend/routers/coverage.py index 37fabe5..39c3e64 100644 --- a/backend/routers/coverage.py +++ b/backend/routers/coverage.py @@ -2,7 +2,8 @@ import json from fastapi import APIRouter, UploadFile, File, Depends, HTTPException from pydantic import BaseModel from sqlalchemy.orm import Session -from db import get_db, ParsedRule, ParserField +from datetime import datetime +from db import get_db, ParsedRule, ParserField, ActiveSource from services import s1_client, rule_parser router = APIRouter() @@ -205,63 +206,118 @@ async def load_parser_content(payload: ParserContentPayload, db: Session = Depen return {"parser": payload.parser_name, "fields": list(fields), "field_count": len(fields)} +@router.post("/sync-sources") +async def sync_sources(days: int = 7, db: Session = Depends(get_db)): + """Pull active dataSource.names from the SDL and store them.""" + from datetime import datetime, timedelta + now = datetime.utcnow() + from_dt = (now - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%S.000Z") + to_dt = now.strftime("%Y-%m-%dT%H:%M:%S.000Z") + try: + result = await s1_client.run_powerquery( + "| group events=count() by dataSource.name | sort -events | limit 200", + from_dt, to_dt + ) + except Exception as e: + raise HTTPException(502, f"PowerQuery error: {e}") + + rows = result.get("events", []) + # Clear old and insert fresh + db.query(ActiveSource).delete() + synced_at = datetime.utcnow() + seen = 0 + for row in rows: + name = row.get("dataSource.name") + if name: + db.add(ActiveSource( + source_name=name, + event_count=row.get("events", 0), + synced_at=synced_at, + )) + seen += 1 + db.commit() + return {"synced": seen, "sources": [r["dataSource.name"] for r in rows if r.get("dataSource.name")]} + + @router.get("/map") def get_coverage_map(db: Session = Depends(get_db)): - """Return coverage analysis: parser fields vs rule fields.""" - rules = db.query(ParsedRule).all() + """ + Source-centric coverage map. + For each active dataSource.name in the SDL: + - covered = a parser is loaded for it + - parser_needed = no parser loaded + Also surfaces which STAR rules reference each source. + """ + active_sources = db.query(ActiveSource).order_by(ActiveSource.event_count.desc()).all() parser_fields_rows = db.query(ParserField).all() + rules = db.query(ParsedRule).all() - # field → list of rules using it + data sources referenced by those rules - rule_field_index: dict[str, list] = {} - rule_ds_index: dict[str, set] = {} # field → set of dataSource.name values + # parser_name → set of field names + parser_index: dict[str, set] = {} + for pf in parser_fields_rows: + parser_index.setdefault(pf.parser_name, set()).add(pf.field_name) + + # Build a fuzzy match: dataSource.name → parser_name + # Parser names like "paloalto", "palo", "okta_authentication-latest" need to match + # "Palo Alto Networks Firewall", "Okta", etc. + def _find_parser(source_name: str) -> str | None: + sn = source_name.lower().replace(" ", "").replace("-", "").replace("_", "") + for pname in parser_index: + pn = pname.lower().replace(" ", "").replace("-", "").replace("_", "") + # Direct substring match in either direction + if pn in sn or sn in pn: + return pname + return None + + # Build rule index: source_name → rules that reference it + rule_by_source: dict[str, list] = {} for rule in rules: query_texts = _star_query_texts(json.loads(rule.raw)) if rule.rule_type == "star" else [] data_sources = rule_parser.extract_data_sources(query_texts) - for field in rule.fields_used or []: - rule_field_index.setdefault(field, []).append( - {"rule": rule.name, "type": rule.rule_type} - ) - rule_ds_index.setdefault(field, set()).update(data_sources) + for ds in data_sources: + rule_by_source.setdefault(ds, []).append({"rule": rule.name, "type": rule.rule_type}) + if not data_sources: + # Rule with no explicit source filter — applies to all + rule_by_source.setdefault("__any__", []).append({"rule": rule.name, "type": rule.rule_type}) - # field → parser name - parser_field_index: dict[str, str] = { - pf.field_name: pf.parser_name for pf in parser_fields_rows - } + sources_out = [] + covered_count = 0 + needed_count = 0 - all_fields = set(rule_field_index) | set(parser_field_index) + for src in active_sources: + matched_parser = _find_parser(src.source_name) + status = "covered" if matched_parser else "parser_needed" + if status == "covered": + covered_count += 1 + else: + needed_count += 1 - detail = {} - for f in all_fields: - in_parser = f in parser_field_index - in_rules = f in rule_field_index - detail[f] = { - "in_parser": in_parser, - "parser_name": parser_field_index.get(f), - "data_sources": sorted(rule_ds_index.get(f, set())), - "rule_count": len(rule_field_index.get(f, [])), - "rules": rule_field_index.get(f, []), - "status": ( - "covered" if in_parser and in_rules - else "unused" if in_parser and not in_rules - else "missing_parser" - ), - } + rules_for_src = rule_by_source.get(src.source_name, []) + rule_by_source.get("__any__", []) - parsed_unused = [f for f, d in detail.items() if d["status"] == "unused"] - missing_parser = [f for f, d in detail.items() if d["status"] == "missing_parser"] - covered = [f for f, d in detail.items() if d["status"] == "covered"] + sources_out.append({ + "source_name": src.source_name, + "event_count": src.event_count, + "status": status, + "parser": matched_parser, + "parser_fields": len(parser_index.get(matched_parser, set())) if matched_parser else 0, + "rules": rules_for_src, + "rule_count": len(rules_for_src), + "synced_at": src.synced_at.isoformat() if src.synced_at else None, + }) + + synced_at = active_sources[0].synced_at.isoformat() if active_sources else None return { "summary": { - "total_parser_fields": len(parser_field_index), - "total_rule_fields": len(rule_field_index), - "covered": len(covered), - "parsed_but_unused": len(parsed_unused), - "rules_missing_parser": len(missing_parser), + "active_sources": len(active_sources), + "covered": covered_count, + "parser_needed": needed_count, + "parsers_loaded": len(parser_index), + "rules_loaded": len(rules), }, - "parsed_but_unused": parsed_unused, - "rules_missing_parser": missing_parser, - "fields": detail, + "sources": sources_out, + "synced_at": synced_at, + "has_sources": len(active_sources) > 0, } diff --git a/frontend/index.html b/frontend/index.html index e7dd161..8130777 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -137,17 +137,16 @@ function renderCoverage() {

Parser Coverage Map

-

Cross-reference SDL parser fields against STAR / Sigma rule fields

+

For each active data source — is a parser loaded?

+ -
-
@@ -198,40 +197,54 @@ async function cvReset() { await apiGet('/api/coverage/reset'); cvData = null; cvLoad() } +async function cvSyncSources() { + setBtn('btn-sync', true) + document.getElementById('cv-err').innerHTML = '' + try { + const r = await apiPost('/api/coverage/sync-sources?days=7', {}) + await cvLoad() + } catch(e) { + document.getElementById('cv-err').innerHTML = errBox(e.message) + } finally { setBtn('btn-sync', false, 'Sync Live Sources') } +} + async function cvLoad() { try { cvData = await apiGet('/api/coverage/map') const s = cvData.summary + document.getElementById('cv-stats').innerHTML = ` -
- ${statCard('Parser Fields', s.total_parser_fields)} - ${statCard('Rule Fields', s.total_rule_fields)} +
+ ${statCard('Active Sources', s.active_sources)} ${statCard('Covered', s.covered, 'text-emerald-400')} - ${statCard('Parsed Unused', s.parsed_but_unused, 'text-yellow-400')} - ${statCard('Missing Parser', s.rules_missing_parser, 'text-red-400')} + ${statCard('Parser Needed', s.parser_needed, 'text-red-400')} + ${statCard('Parsers Loaded', s.parsers_loaded, 'text-purple-400')}
` + + if (!cvData.has_sources) { + document.getElementById('cv-filters').classList.add('hidden') + document.getElementById('cv-table').innerHTML = ` +
+

No active sources synced yet.

+

Click Sync Live Sources to pull current dataSource.names from the data lake, then Load STAR Rules and Load SDL Parsers to see coverage.

+
` + return + } + const filtersEl = document.getElementById('cv-filters') filtersEl.classList.remove('hidden') - filtersEl.innerHTML = [['all','All'],['covered','Covered'],['unused','Parsed Unused'],['missing_parser','Missing Parser']] + filtersEl.innerHTML = [['all','All'],['covered','Covered'],['parser_needed','Parser Needed']] .map(([f,l]) => ``).join('') - cvSetFilter(cvFilter) - } catch { - document.getElementById('cv-table').innerHTML = '

Load STAR rules or upload parsers to begin.

' - } -} -function suggestParser(field, dataSources) { - if (dataSources && dataSources.length) { - return 'Parser needed for: ' + dataSources.join(', ') + if (cvData.synced_at) { + filtersEl.innerHTML += `Synced ${new Date(cvData.synced_at).toLocaleTimeString()}` + } + + cvSetFilter(cvFilter) + } catch(e) { + document.getElementById('cv-table').innerHTML = '

Failed to load coverage data.

' } - // Fallback if no dataSource.name found in rule queries - const f = field.toLowerCase() - if (f.startsWith('wineventlog')) return 'Windows Event Log (WEL) parser' - if (f.startsWith('event.')) return 'Event normalisation parser' - if (f.includes('dns')) return 'DNS log parser' - if (f.includes('process')) return 'Endpoint process parser' - return 'Custom parser needed' } function cvSetFilter(f) { @@ -240,28 +253,32 @@ function cvSetFilter(f) { const on = b.id === `cvf-${f}` b.className = `px-3 py-1 text-xs rounded-full border transition-colors ${on ? 'bg-purple-700 border-purple-600 text-white' : 'border-gray-700 text-gray-400 hover:border-gray-500'}` }) - if (!cvData) return - const LABELS = { covered:'Covered', unused:'Parsed Unused', missing_parser:'Missing Parser' } - const STYLES = { covered:'bg-emerald-900/50 text-emerald-300 border-emerald-700', unused:'bg-yellow-900/50 text-yellow-300 border-yellow-700', missing_parser:'bg-red-900/50 text-red-300 border-red-700' } - const fields = Object.entries(cvData.fields).filter(([,d]) => f === 'all' || d.status === f) - const showSuggest = f === 'missing_parser' || f === 'all' - document.getElementById('cv-table').innerHTML = fields.length === 0 - ? '

No fields match this filter.

' + if (!cvData?.sources) return + + const LABELS = { covered: 'Covered', parser_needed: 'Parser Needed' } + const STYLES = { covered: 'bg-emerald-900/50 text-emerald-300 border-emerald-700', parser_needed: 'bg-red-900/50 text-red-300 border-red-700' } + + const sources = cvData.sources.filter(s => f === 'all' || s.status === f) + + document.getElementById('cv-table').innerHTML = sources.length === 0 + ? '

No sources match this filter.

' : `
- + + - - + + - ${fields.map(([field, d]) => ` + ${sources.map(s => ` - - - + + + - + `).join('')}
FieldData SourceEvents (7d) StatusParser / SuggestionBlocked rulesParserSTAR Rules
${esc(field)}${LABELS[d.status]||d.status} - ${d.status === 'missing_parser' ? '⚠ ' + esc(suggestParser(field, d.data_sources)) : esc(d.parser_name || '—')} + ${esc(s.source_name)}${(s.event_count||0).toLocaleString()}${LABELS[s.status]||s.status} + ${s.status === 'parser_needed' ? '⚠ No parser loaded' : esc(s.parser) + ' (' + s.parser_fields + ' fields)'} ${d.rules?.length ? d.rules.map(r=>esc(r.rule)).join(', ') : '—'}${s.rules?.length ? s.rules.map(r=>esc(r.rule)).join(', ') : '—'}
` }