Rewrite coverage map as source-centric view

Previously showed field-level coverage (rule fields vs parser fields).
Now shows per-dataSource.name coverage: is a parser loaded for each
active ingest source?

- New ActiveSource DB model stores live sources from SDL
- New POST /api/coverage/sync-sources endpoint runs PowerQuery to fetch
  current dataSource.names and their event counts, stores in DB
- GET /api/coverage/map now returns per-source status:
    covered       = a loaded parser matches this source name
    parser_needed = source is ingesting but no parser is loaded
- Parser matching uses fuzzy substring (handles "palo"→"Palo Alto Networks Firewall")
- Coverage table shows: source name, 7d event count, status, matched parser + field count, STAR rules
- Frontend: new "Sync Live Sources" button, updated stats cards, updated filter tabs
- Removed field-level view (was confusing — parser_needed on a field ≠ missing parser for a source)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mick
2026-05-19 12:31:48 -04:00
parent 2262892859
commit f0bd56aee8
3 changed files with 164 additions and 83 deletions
+8
View File
@@ -30,6 +30,14 @@ class ParserField(Base):
field_type = Column(String)
class ActiveSource(Base):
__tablename__ = "active_sources"
id = Column(Integer, primary_key=True)
source_name = Column(String, unique=True, index=True)
event_count = Column(Integer, default=0)
synced_at = Column(DateTime, default=datetime.utcnow)
class IngestSnapshot(Base):
__tablename__ = "ingest_snapshots"
id = Column(Integer, primary_key=True)
+99 -43
View File
@@ -2,7 +2,8 @@ import json
from fastapi import APIRouter, UploadFile, File, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
from db import get_db, ParsedRule, ParserField
from datetime import datetime
from db import get_db, ParsedRule, ParserField, ActiveSource
from services import s1_client, rule_parser
router = APIRouter()
@@ -205,63 +206,118 @@ async def load_parser_content(payload: ParserContentPayload, db: Session = Depen
return {"parser": payload.parser_name, "fields": list(fields), "field_count": len(fields)}
@router.post("/sync-sources")
async def sync_sources(days: int = 7, db: Session = Depends(get_db)):
"""Pull active dataSource.names from the SDL and store them."""
from datetime import datetime, timedelta
now = datetime.utcnow()
from_dt = (now - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%S.000Z")
to_dt = now.strftime("%Y-%m-%dT%H:%M:%S.000Z")
try:
result = await s1_client.run_powerquery(
"| group events=count() by dataSource.name | sort -events | limit 200",
from_dt, to_dt
)
except Exception as e:
raise HTTPException(502, f"PowerQuery error: {e}")
rows = result.get("events", [])
# Clear old and insert fresh
db.query(ActiveSource).delete()
synced_at = datetime.utcnow()
seen = 0
for row in rows:
name = row.get("dataSource.name")
if name:
db.add(ActiveSource(
source_name=name,
event_count=row.get("events", 0),
synced_at=synced_at,
))
seen += 1
db.commit()
return {"synced": seen, "sources": [r["dataSource.name"] for r in rows if r.get("dataSource.name")]}
@router.get("/map")
def get_coverage_map(db: Session = Depends(get_db)):
"""Return coverage analysis: parser fields vs rule fields."""
rules = db.query(ParsedRule).all()
"""
Source-centric coverage map.
For each active dataSource.name in the SDL:
- covered = a parser is loaded for it
- parser_needed = no parser loaded
Also surfaces which STAR rules reference each source.
"""
active_sources = db.query(ActiveSource).order_by(ActiveSource.event_count.desc()).all()
parser_fields_rows = db.query(ParserField).all()
rules = db.query(ParsedRule).all()
# field → list of rules using it + data sources referenced by those rules
rule_field_index: dict[str, list] = {}
rule_ds_index: dict[str, set] = {} # field → set of dataSource.name values
# parser_name → set of field names
parser_index: dict[str, set] = {}
for pf in parser_fields_rows:
parser_index.setdefault(pf.parser_name, set()).add(pf.field_name)
# Build a fuzzy match: dataSource.name → parser_name
# Parser names like "paloalto", "palo", "okta_authentication-latest" need to match
# "Palo Alto Networks Firewall", "Okta", etc.
def _find_parser(source_name: str) -> str | None:
sn = source_name.lower().replace(" ", "").replace("-", "").replace("_", "")
for pname in parser_index:
pn = pname.lower().replace(" ", "").replace("-", "").replace("_", "")
# Direct substring match in either direction
if pn in sn or sn in pn:
return pname
return None
# Build rule index: source_name → rules that reference it
rule_by_source: dict[str, list] = {}
for rule in rules:
query_texts = _star_query_texts(json.loads(rule.raw)) if rule.rule_type == "star" else []
data_sources = rule_parser.extract_data_sources(query_texts)
for field in rule.fields_used or []:
rule_field_index.setdefault(field, []).append(
{"rule": rule.name, "type": rule.rule_type}
)
rule_ds_index.setdefault(field, set()).update(data_sources)
for ds in data_sources:
rule_by_source.setdefault(ds, []).append({"rule": rule.name, "type": rule.rule_type})
if not data_sources:
# Rule with no explicit source filter — applies to all
rule_by_source.setdefault("__any__", []).append({"rule": rule.name, "type": rule.rule_type})
# field → parser name
parser_field_index: dict[str, str] = {
pf.field_name: pf.parser_name for pf in parser_fields_rows
}
sources_out = []
covered_count = 0
needed_count = 0
all_fields = set(rule_field_index) | set(parser_field_index)
for src in active_sources:
matched_parser = _find_parser(src.source_name)
status = "covered" if matched_parser else "parser_needed"
if status == "covered":
covered_count += 1
else:
needed_count += 1
detail = {}
for f in all_fields:
in_parser = f in parser_field_index
in_rules = f in rule_field_index
detail[f] = {
"in_parser": in_parser,
"parser_name": parser_field_index.get(f),
"data_sources": sorted(rule_ds_index.get(f, set())),
"rule_count": len(rule_field_index.get(f, [])),
"rules": rule_field_index.get(f, []),
"status": (
"covered" if in_parser and in_rules
else "unused" if in_parser and not in_rules
else "missing_parser"
),
}
rules_for_src = rule_by_source.get(src.source_name, []) + rule_by_source.get("__any__", [])
parsed_unused = [f for f, d in detail.items() if d["status"] == "unused"]
missing_parser = [f for f, d in detail.items() if d["status"] == "missing_parser"]
covered = [f for f, d in detail.items() if d["status"] == "covered"]
sources_out.append({
"source_name": src.source_name,
"event_count": src.event_count,
"status": status,
"parser": matched_parser,
"parser_fields": len(parser_index.get(matched_parser, set())) if matched_parser else 0,
"rules": rules_for_src,
"rule_count": len(rules_for_src),
"synced_at": src.synced_at.isoformat() if src.synced_at else None,
})
synced_at = active_sources[0].synced_at.isoformat() if active_sources else None
return {
"summary": {
"total_parser_fields": len(parser_field_index),
"total_rule_fields": len(rule_field_index),
"covered": len(covered),
"parsed_but_unused": len(parsed_unused),
"rules_missing_parser": len(missing_parser),
"active_sources": len(active_sources),
"covered": covered_count,
"parser_needed": needed_count,
"parsers_loaded": len(parser_index),
"rules_loaded": len(rules),
},
"parsed_but_unused": parsed_unused,
"rules_missing_parser": missing_parser,
"fields": detail,
"sources": sources_out,
"synced_at": synced_at,
"has_sources": len(active_sources) > 0,
}
+57 -40
View File
@@ -137,17 +137,16 @@ function renderCoverage() {
<div class="flex items-start justify-between mb-6">
<div>
<h1 class="text-xl font-bold text-white">Parser Coverage Map</h1>
<p class="text-sm text-gray-400 mt-1">Cross-reference SDL parser fields against STAR / Sigma rule fields</p>
<p class="text-sm text-gray-400 mt-1">For each active data source — is a parser loaded?</p>
</div>
<div class="flex gap-2 flex-wrap justify-end">
<button id="btn-sync" onclick="cvSyncSources()" class="px-3 py-1.5 text-sm bg-blue-700 hover:bg-blue-600 rounded-lg text-white">Sync Live Sources</button>
<button id="btn-star" onclick="loadStar()" class="px-3 py-1.5 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white">Load STAR Rules</button>
<button id="btn-sdl-parsers" onclick="loadSDLParsers()" class="px-3 py-1.5 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white">Load SDL Parsers</button>
<button onclick="document.getElementById('f-sigma').click()" class="px-3 py-1.5 text-sm bg-gray-700 hover:bg-gray-600 rounded-lg text-white">Upload Sigma Rules</button>
<button onclick="document.getElementById('f-parser').click()" class="px-3 py-1.5 text-sm bg-gray-700 hover:bg-gray-600 rounded-lg text-white">Upload Parser</button>
<button onclick="cvReset()" class="px-3 py-1.5 text-sm bg-red-900/60 hover:bg-red-800 rounded-lg text-red-300">Reset</button>
</div>
</div>
<input type="file" id="f-sigma" accept=".yml,.yaml" multiple class="hidden" onchange="cvUploadSigma(this.files)">
<input type="file" id="f-parser" accept=".json" class="hidden" onchange="cvUploadParser(this.files[0])">
<div id="cv-err"></div>
<div id="cv-stats"></div>
@@ -198,40 +197,54 @@ async function cvReset() {
await apiGet('/api/coverage/reset'); cvData = null; cvLoad()
}
async function cvSyncSources() {
setBtn('btn-sync', true)
document.getElementById('cv-err').innerHTML = ''
try {
const r = await apiPost('/api/coverage/sync-sources?days=7', {})
await cvLoad()
} catch(e) {
document.getElementById('cv-err').innerHTML = errBox(e.message)
} finally { setBtn('btn-sync', false, 'Sync Live Sources') }
}
async function cvLoad() {
try {
cvData = await apiGet('/api/coverage/map')
const s = cvData.summary
document.getElementById('cv-stats').innerHTML = `
<div class="grid grid-cols-5 gap-3 mb-6">
${statCard('Parser Fields', s.total_parser_fields)}
${statCard('Rule Fields', s.total_rule_fields)}
<div class="grid grid-cols-4 gap-3 mb-6">
${statCard('Active Sources', s.active_sources)}
${statCard('Covered', s.covered, 'text-emerald-400')}
${statCard('Parsed Unused', s.parsed_but_unused, 'text-yellow-400')}
${statCard('Missing Parser', s.rules_missing_parser, 'text-red-400')}
${statCard('Parser Needed', s.parser_needed, 'text-red-400')}
${statCard('Parsers Loaded', s.parsers_loaded, 'text-purple-400')}
</div>`
if (!cvData.has_sources) {
document.getElementById('cv-filters').classList.add('hidden')
document.getElementById('cv-table').innerHTML = `
<div class="bg-gray-900/50 border border-gray-800 rounded-lg p-6 text-center text-sm text-gray-500">
<p class="mb-2">No active sources synced yet.</p>
<p>Click <strong class="text-gray-300">Sync Live Sources</strong> to pull current dataSource.names from the data lake, then <strong class="text-gray-300">Load STAR Rules</strong> and <strong class="text-gray-300">Load SDL Parsers</strong> to see coverage.</p>
</div>`
return
}
const filtersEl = document.getElementById('cv-filters')
filtersEl.classList.remove('hidden')
filtersEl.innerHTML = [['all','All'],['covered','Covered'],['unused','Parsed Unused'],['missing_parser','Missing Parser']]
filtersEl.innerHTML = [['all','All'],['covered','Covered'],['parser_needed','Parser Needed']]
.map(([f,l]) => `<button onclick="cvSetFilter('${f}')" id="cvf-${f}"
class="px-3 py-1 text-xs rounded-full border border-gray-700 text-gray-400 hover:border-gray-500">${l}</button>`).join('')
cvSetFilter(cvFilter)
} catch {
document.getElementById('cv-table').innerHTML = '<p class="text-gray-600 text-sm">Load STAR rules or upload parsers to begin.</p>'
}
}
function suggestParser(field, dataSources) {
if (dataSources && dataSources.length) {
return 'Parser needed for: ' + dataSources.join(', ')
if (cvData.synced_at) {
filtersEl.innerHTML += `<span class="text-xs text-gray-600 self-center ml-2">Synced ${new Date(cvData.synced_at).toLocaleTimeString()}</span>`
}
cvSetFilter(cvFilter)
} catch(e) {
document.getElementById('cv-table').innerHTML = '<p class="text-gray-600 text-sm">Failed to load coverage data.</p>'
}
// Fallback if no dataSource.name found in rule queries
const f = field.toLowerCase()
if (f.startsWith('wineventlog')) return 'Windows Event Log (WEL) parser'
if (f.startsWith('event.')) return 'Event normalisation parser'
if (f.includes('dns')) return 'DNS log parser'
if (f.includes('process')) return 'Endpoint process parser'
return 'Custom parser needed'
}
function cvSetFilter(f) {
@@ -240,28 +253,32 @@ function cvSetFilter(f) {
const on = b.id === `cvf-${f}`
b.className = `px-3 py-1 text-xs rounded-full border transition-colors ${on ? 'bg-purple-700 border-purple-600 text-white' : 'border-gray-700 text-gray-400 hover:border-gray-500'}`
})
if (!cvData) return
const LABELS = { covered:'Covered', unused:'Parsed Unused', missing_parser:'Missing Parser' }
const STYLES = { covered:'bg-emerald-900/50 text-emerald-300 border-emerald-700', unused:'bg-yellow-900/50 text-yellow-300 border-yellow-700', missing_parser:'bg-red-900/50 text-red-300 border-red-700' }
const fields = Object.entries(cvData.fields).filter(([,d]) => f === 'all' || d.status === f)
const showSuggest = f === 'missing_parser' || f === 'all'
document.getElementById('cv-table').innerHTML = fields.length === 0
? '<p class="text-gray-600 text-sm">No fields match this filter.</p>'
if (!cvData?.sources) return
const LABELS = { covered: 'Covered', parser_needed: 'Parser Needed' }
const STYLES = { covered: 'bg-emerald-900/50 text-emerald-300 border-emerald-700', parser_needed: 'bg-red-900/50 text-red-300 border-red-700' }
const sources = cvData.sources.filter(s => f === 'all' || s.status === f)
document.getElementById('cv-table').innerHTML = sources.length === 0
? '<p class="text-gray-600 text-sm">No sources match this filter.</p>'
: `<div class="overflow-x-auto"><table class="w-full text-sm">
<thead><tr class="text-left text-gray-500 border-b border-gray-800">
<th class="pb-2 pr-4 font-medium">Field</th>
<th class="pb-2 pr-4 font-medium">Data Source</th>
<th class="pb-2 pr-4 font-medium">Events (7d)</th>
<th class="pb-2 pr-4 font-medium">Status</th>
<th class="pb-2 pr-4 font-medium">Parser / Suggestion</th>
<th class="pb-2 font-medium">Blocked rules</th>
<th class="pb-2 pr-4 font-medium">Parser</th>
<th class="pb-2 font-medium">STAR Rules</th>
</tr></thead>
<tbody>${fields.map(([field, d]) => `
<tbody>${sources.map(s => `
<tr class="border-b border-gray-800/50 hover:bg-gray-900/30">
<td class="py-2 pr-4 font-mono text-xs text-gray-200">${esc(field)}</td>
<td class="py-2 pr-4"><span class="px-2 py-0.5 rounded text-xs border ${STYLES[d.status]||''}">${LABELS[d.status]||d.status}</span></td>
<td class="py-2 pr-4 text-xs ${d.status === 'missing_parser' ? 'text-amber-400 italic' : 'text-gray-400'}">
${d.status === 'missing_parser' ? '⚠ ' + esc(suggestParser(field, d.data_sources)) : esc(d.parser_name || '—')}
<td class="py-2 pr-4 font-mono text-xs text-gray-200">${esc(s.source_name)}</td>
<td class="py-2 pr-4 text-xs text-gray-400">${(s.event_count||0).toLocaleString()}</td>
<td class="py-2 pr-4"><span class="px-2 py-0.5 rounded text-xs border ${STYLES[s.status]||''}">${LABELS[s.status]||s.status}</span></td>
<td class="py-2 pr-4 text-xs ${s.status === 'parser_needed' ? 'text-amber-400 italic' : 'text-gray-400'}">
${s.status === 'parser_needed' ? '⚠ No parser loaded' : esc(s.parser) + ' (' + s.parser_fields + ' fields)'}
</td>
<td class="py-2 text-xs text-gray-400">${d.rules?.length ? d.rules.map(r=>esc(r.rule)).join(', ') : '—'}</td>
<td class="py-2 text-xs text-gray-400">${s.rules?.length ? s.rules.map(r=>esc(r.rule)).join(', ') : '—'}</td>
</tr>`).join('')}
</tbody></table></div>`
}