Rewrite coverage map as source-centric view

Previously showed field-level coverage (rule fields vs parser fields).
Now shows per-dataSource.name coverage: is a parser loaded for each
active ingest source?

- New ActiveSource DB model stores live sources from SDL
- New POST /api/coverage/sync-sources endpoint runs PowerQuery to fetch
  current dataSource.names and their event counts, stores in DB
- GET /api/coverage/map now returns per-source status:
    covered       = a loaded parser matches this source name
    parser_needed = source is ingesting but no parser is loaded
- Parser matching uses fuzzy substring (handles "palo"→"Palo Alto Networks Firewall")
- Coverage table shows: source name, 7d event count, status, matched parser + field count, STAR rules
- Frontend: new "Sync Live Sources" button, updated stats cards, updated filter tabs
- Removed field-level view (was confusing — parser_needed on a field ≠ missing parser for a source)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mick
2026-05-19 12:31:48 -04:00
parent 2262892859
commit f0bd56aee8
3 changed files with 164 additions and 83 deletions
+8
View File
@@ -30,6 +30,14 @@ class ParserField(Base):
field_type = Column(String) field_type = Column(String)
class ActiveSource(Base):
__tablename__ = "active_sources"
id = Column(Integer, primary_key=True)
source_name = Column(String, unique=True, index=True)
event_count = Column(Integer, default=0)
synced_at = Column(DateTime, default=datetime.utcnow)
class IngestSnapshot(Base): class IngestSnapshot(Base):
__tablename__ = "ingest_snapshots" __tablename__ = "ingest_snapshots"
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
+99 -43
View File
@@ -2,7 +2,8 @@ import json
from fastapi import APIRouter, UploadFile, File, Depends, HTTPException from fastapi import APIRouter, UploadFile, File, Depends, HTTPException
from pydantic import BaseModel from pydantic import BaseModel
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from db import get_db, ParsedRule, ParserField from datetime import datetime
from db import get_db, ParsedRule, ParserField, ActiveSource
from services import s1_client, rule_parser from services import s1_client, rule_parser
router = APIRouter() router = APIRouter()
@@ -205,63 +206,118 @@ async def load_parser_content(payload: ParserContentPayload, db: Session = Depen
return {"parser": payload.parser_name, "fields": list(fields), "field_count": len(fields)} return {"parser": payload.parser_name, "fields": list(fields), "field_count": len(fields)}
@router.post("/sync-sources")
async def sync_sources(days: int = 7, db: Session = Depends(get_db)):
"""Pull active dataSource.names from the SDL and store them."""
from datetime import datetime, timedelta
now = datetime.utcnow()
from_dt = (now - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%S.000Z")
to_dt = now.strftime("%Y-%m-%dT%H:%M:%S.000Z")
try:
result = await s1_client.run_powerquery(
"| group events=count() by dataSource.name | sort -events | limit 200",
from_dt, to_dt
)
except Exception as e:
raise HTTPException(502, f"PowerQuery error: {e}")
rows = result.get("events", [])
# Clear old and insert fresh
db.query(ActiveSource).delete()
synced_at = datetime.utcnow()
seen = 0
for row in rows:
name = row.get("dataSource.name")
if name:
db.add(ActiveSource(
source_name=name,
event_count=row.get("events", 0),
synced_at=synced_at,
))
seen += 1
db.commit()
return {"synced": seen, "sources": [r["dataSource.name"] for r in rows if r.get("dataSource.name")]}
@router.get("/map") @router.get("/map")
def get_coverage_map(db: Session = Depends(get_db)): def get_coverage_map(db: Session = Depends(get_db)):
"""Return coverage analysis: parser fields vs rule fields.""" """
rules = db.query(ParsedRule).all() Source-centric coverage map.
For each active dataSource.name in the SDL:
- covered = a parser is loaded for it
- parser_needed = no parser loaded
Also surfaces which STAR rules reference each source.
"""
active_sources = db.query(ActiveSource).order_by(ActiveSource.event_count.desc()).all()
parser_fields_rows = db.query(ParserField).all() parser_fields_rows = db.query(ParserField).all()
rules = db.query(ParsedRule).all()
# field → list of rules using it + data sources referenced by those rules # parser_name → set of field names
rule_field_index: dict[str, list] = {} parser_index: dict[str, set] = {}
rule_ds_index: dict[str, set] = {} # field → set of dataSource.name values for pf in parser_fields_rows:
parser_index.setdefault(pf.parser_name, set()).add(pf.field_name)
# Build a fuzzy match: dataSource.name → parser_name
# Parser names like "paloalto", "palo", "okta_authentication-latest" need to match
# "Palo Alto Networks Firewall", "Okta", etc.
def _find_parser(source_name: str) -> str | None:
sn = source_name.lower().replace(" ", "").replace("-", "").replace("_", "")
for pname in parser_index:
pn = pname.lower().replace(" ", "").replace("-", "").replace("_", "")
# Direct substring match in either direction
if pn in sn or sn in pn:
return pname
return None
# Build rule index: source_name → rules that reference it
rule_by_source: dict[str, list] = {}
for rule in rules: for rule in rules:
query_texts = _star_query_texts(json.loads(rule.raw)) if rule.rule_type == "star" else [] query_texts = _star_query_texts(json.loads(rule.raw)) if rule.rule_type == "star" else []
data_sources = rule_parser.extract_data_sources(query_texts) data_sources = rule_parser.extract_data_sources(query_texts)
for field in rule.fields_used or []: for ds in data_sources:
rule_field_index.setdefault(field, []).append( rule_by_source.setdefault(ds, []).append({"rule": rule.name, "type": rule.rule_type})
{"rule": rule.name, "type": rule.rule_type} if not data_sources:
) # Rule with no explicit source filter — applies to all
rule_ds_index.setdefault(field, set()).update(data_sources) rule_by_source.setdefault("__any__", []).append({"rule": rule.name, "type": rule.rule_type})
# field → parser name sources_out = []
parser_field_index: dict[str, str] = { covered_count = 0
pf.field_name: pf.parser_name for pf in parser_fields_rows needed_count = 0
}
all_fields = set(rule_field_index) | set(parser_field_index) for src in active_sources:
matched_parser = _find_parser(src.source_name)
status = "covered" if matched_parser else "parser_needed"
if status == "covered":
covered_count += 1
else:
needed_count += 1
detail = {} rules_for_src = rule_by_source.get(src.source_name, []) + rule_by_source.get("__any__", [])
for f in all_fields:
in_parser = f in parser_field_index
in_rules = f in rule_field_index
detail[f] = {
"in_parser": in_parser,
"parser_name": parser_field_index.get(f),
"data_sources": sorted(rule_ds_index.get(f, set())),
"rule_count": len(rule_field_index.get(f, [])),
"rules": rule_field_index.get(f, []),
"status": (
"covered" if in_parser and in_rules
else "unused" if in_parser and not in_rules
else "missing_parser"
),
}
parsed_unused = [f for f, d in detail.items() if d["status"] == "unused"] sources_out.append({
missing_parser = [f for f, d in detail.items() if d["status"] == "missing_parser"] "source_name": src.source_name,
covered = [f for f, d in detail.items() if d["status"] == "covered"] "event_count": src.event_count,
"status": status,
"parser": matched_parser,
"parser_fields": len(parser_index.get(matched_parser, set())) if matched_parser else 0,
"rules": rules_for_src,
"rule_count": len(rules_for_src),
"synced_at": src.synced_at.isoformat() if src.synced_at else None,
})
synced_at = active_sources[0].synced_at.isoformat() if active_sources else None
return { return {
"summary": { "summary": {
"total_parser_fields": len(parser_field_index), "active_sources": len(active_sources),
"total_rule_fields": len(rule_field_index), "covered": covered_count,
"covered": len(covered), "parser_needed": needed_count,
"parsed_but_unused": len(parsed_unused), "parsers_loaded": len(parser_index),
"rules_missing_parser": len(missing_parser), "rules_loaded": len(rules),
}, },
"parsed_but_unused": parsed_unused, "sources": sources_out,
"rules_missing_parser": missing_parser, "synced_at": synced_at,
"fields": detail, "has_sources": len(active_sources) > 0,
} }
+60 -43
View File
@@ -137,17 +137,16 @@ function renderCoverage() {
<div class="flex items-start justify-between mb-6"> <div class="flex items-start justify-between mb-6">
<div> <div>
<h1 class="text-xl font-bold text-white">Parser Coverage Map</h1> <h1 class="text-xl font-bold text-white">Parser Coverage Map</h1>
<p class="text-sm text-gray-400 mt-1">Cross-reference SDL parser fields against STAR / Sigma rule fields</p> <p class="text-sm text-gray-400 mt-1">For each active data source — is a parser loaded?</p>
</div> </div>
<div class="flex gap-2 flex-wrap justify-end"> <div class="flex gap-2 flex-wrap justify-end">
<button id="btn-sync" onclick="cvSyncSources()" class="px-3 py-1.5 text-sm bg-blue-700 hover:bg-blue-600 rounded-lg text-white">Sync Live Sources</button>
<button id="btn-star" onclick="loadStar()" class="px-3 py-1.5 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white">Load STAR Rules</button> <button id="btn-star" onclick="loadStar()" class="px-3 py-1.5 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white">Load STAR Rules</button>
<button id="btn-sdl-parsers" onclick="loadSDLParsers()" class="px-3 py-1.5 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white">Load SDL Parsers</button> <button id="btn-sdl-parsers" onclick="loadSDLParsers()" class="px-3 py-1.5 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white">Load SDL Parsers</button>
<button onclick="document.getElementById('f-sigma').click()" class="px-3 py-1.5 text-sm bg-gray-700 hover:bg-gray-600 rounded-lg text-white">Upload Sigma Rules</button>
<button onclick="document.getElementById('f-parser').click()" class="px-3 py-1.5 text-sm bg-gray-700 hover:bg-gray-600 rounded-lg text-white">Upload Parser</button> <button onclick="document.getElementById('f-parser').click()" class="px-3 py-1.5 text-sm bg-gray-700 hover:bg-gray-600 rounded-lg text-white">Upload Parser</button>
<button onclick="cvReset()" class="px-3 py-1.5 text-sm bg-red-900/60 hover:bg-red-800 rounded-lg text-red-300">Reset</button> <button onclick="cvReset()" class="px-3 py-1.5 text-sm bg-red-900/60 hover:bg-red-800 rounded-lg text-red-300">Reset</button>
</div> </div>
</div> </div>
<input type="file" id="f-sigma" accept=".yml,.yaml" multiple class="hidden" onchange="cvUploadSigma(this.files)">
<input type="file" id="f-parser" accept=".json" class="hidden" onchange="cvUploadParser(this.files[0])"> <input type="file" id="f-parser" accept=".json" class="hidden" onchange="cvUploadParser(this.files[0])">
<div id="cv-err"></div> <div id="cv-err"></div>
<div id="cv-stats"></div> <div id="cv-stats"></div>
@@ -198,40 +197,54 @@ async function cvReset() {
await apiGet('/api/coverage/reset'); cvData = null; cvLoad() await apiGet('/api/coverage/reset'); cvData = null; cvLoad()
} }
async function cvSyncSources() {
setBtn('btn-sync', true)
document.getElementById('cv-err').innerHTML = ''
try {
const r = await apiPost('/api/coverage/sync-sources?days=7', {})
await cvLoad()
} catch(e) {
document.getElementById('cv-err').innerHTML = errBox(e.message)
} finally { setBtn('btn-sync', false, 'Sync Live Sources') }
}
async function cvLoad() { async function cvLoad() {
try { try {
cvData = await apiGet('/api/coverage/map') cvData = await apiGet('/api/coverage/map')
const s = cvData.summary const s = cvData.summary
document.getElementById('cv-stats').innerHTML = ` document.getElementById('cv-stats').innerHTML = `
<div class="grid grid-cols-5 gap-3 mb-6"> <div class="grid grid-cols-4 gap-3 mb-6">
${statCard('Parser Fields', s.total_parser_fields)} ${statCard('Active Sources', s.active_sources)}
${statCard('Rule Fields', s.total_rule_fields)}
${statCard('Covered', s.covered, 'text-emerald-400')} ${statCard('Covered', s.covered, 'text-emerald-400')}
${statCard('Parsed Unused', s.parsed_but_unused, 'text-yellow-400')} ${statCard('Parser Needed', s.parser_needed, 'text-red-400')}
${statCard('Missing Parser', s.rules_missing_parser, 'text-red-400')} ${statCard('Parsers Loaded', s.parsers_loaded, 'text-purple-400')}
</div>` </div>`
const filtersEl = document.getElementById('cv-filters')
filtersEl.classList.remove('hidden') if (!cvData.has_sources) {
filtersEl.innerHTML = [['all','All'],['covered','Covered'],['unused','Parsed Unused'],['missing_parser','Missing Parser']] document.getElementById('cv-filters').classList.add('hidden')
.map(([f,l]) => `<button onclick="cvSetFilter('${f}')" id="cvf-${f}" document.getElementById('cv-table').innerHTML = `
class="px-3 py-1 text-xs rounded-full border border-gray-700 text-gray-400 hover:border-gray-500">${l}</button>`).join('') <div class="bg-gray-900/50 border border-gray-800 rounded-lg p-6 text-center text-sm text-gray-500">
cvSetFilter(cvFilter) <p class="mb-2">No active sources synced yet.</p>
} catch { <p>Click <strong class="text-gray-300">Sync Live Sources</strong> to pull current dataSource.names from the data lake, then <strong class="text-gray-300">Load STAR Rules</strong> and <strong class="text-gray-300">Load SDL Parsers</strong> to see coverage.</p>
document.getElementById('cv-table').innerHTML = '<p class="text-gray-600 text-sm">Load STAR rules or upload parsers to begin.</p>' </div>`
} return
} }
function suggestParser(field, dataSources) { const filtersEl = document.getElementById('cv-filters')
if (dataSources && dataSources.length) { filtersEl.classList.remove('hidden')
return 'Parser needed for: ' + dataSources.join(', ') filtersEl.innerHTML = [['all','All'],['covered','Covered'],['parser_needed','Parser Needed']]
.map(([f,l]) => `<button onclick="cvSetFilter('${f}')" id="cvf-${f}"
class="px-3 py-1 text-xs rounded-full border border-gray-700 text-gray-400 hover:border-gray-500">${l}</button>`).join('')
if (cvData.synced_at) {
filtersEl.innerHTML += `<span class="text-xs text-gray-600 self-center ml-2">Synced ${new Date(cvData.synced_at).toLocaleTimeString()}</span>`
}
cvSetFilter(cvFilter)
} catch(e) {
document.getElementById('cv-table').innerHTML = '<p class="text-gray-600 text-sm">Failed to load coverage data.</p>'
} }
// Fallback if no dataSource.name found in rule queries
const f = field.toLowerCase()
if (f.startsWith('wineventlog')) return 'Windows Event Log (WEL) parser'
if (f.startsWith('event.')) return 'Event normalisation parser'
if (f.includes('dns')) return 'DNS log parser'
if (f.includes('process')) return 'Endpoint process parser'
return 'Custom parser needed'
} }
function cvSetFilter(f) { function cvSetFilter(f) {
@@ -240,28 +253,32 @@ function cvSetFilter(f) {
const on = b.id === `cvf-${f}` const on = b.id === `cvf-${f}`
b.className = `px-3 py-1 text-xs rounded-full border transition-colors ${on ? 'bg-purple-700 border-purple-600 text-white' : 'border-gray-700 text-gray-400 hover:border-gray-500'}` b.className = `px-3 py-1 text-xs rounded-full border transition-colors ${on ? 'bg-purple-700 border-purple-600 text-white' : 'border-gray-700 text-gray-400 hover:border-gray-500'}`
}) })
if (!cvData) return if (!cvData?.sources) return
const LABELS = { covered:'Covered', unused:'Parsed Unused', missing_parser:'Missing Parser' }
const STYLES = { covered:'bg-emerald-900/50 text-emerald-300 border-emerald-700', unused:'bg-yellow-900/50 text-yellow-300 border-yellow-700', missing_parser:'bg-red-900/50 text-red-300 border-red-700' } const LABELS = { covered: 'Covered', parser_needed: 'Parser Needed' }
const fields = Object.entries(cvData.fields).filter(([,d]) => f === 'all' || d.status === f) const STYLES = { covered: 'bg-emerald-900/50 text-emerald-300 border-emerald-700', parser_needed: 'bg-red-900/50 text-red-300 border-red-700' }
const showSuggest = f === 'missing_parser' || f === 'all'
document.getElementById('cv-table').innerHTML = fields.length === 0 const sources = cvData.sources.filter(s => f === 'all' || s.status === f)
? '<p class="text-gray-600 text-sm">No fields match this filter.</p>'
document.getElementById('cv-table').innerHTML = sources.length === 0
? '<p class="text-gray-600 text-sm">No sources match this filter.</p>'
: `<div class="overflow-x-auto"><table class="w-full text-sm"> : `<div class="overflow-x-auto"><table class="w-full text-sm">
<thead><tr class="text-left text-gray-500 border-b border-gray-800"> <thead><tr class="text-left text-gray-500 border-b border-gray-800">
<th class="pb-2 pr-4 font-medium">Field</th> <th class="pb-2 pr-4 font-medium">Data Source</th>
<th class="pb-2 pr-4 font-medium">Events (7d)</th>
<th class="pb-2 pr-4 font-medium">Status</th> <th class="pb-2 pr-4 font-medium">Status</th>
<th class="pb-2 pr-4 font-medium">Parser / Suggestion</th> <th class="pb-2 pr-4 font-medium">Parser</th>
<th class="pb-2 font-medium">Blocked rules</th> <th class="pb-2 font-medium">STAR Rules</th>
</tr></thead> </tr></thead>
<tbody>${fields.map(([field, d]) => ` <tbody>${sources.map(s => `
<tr class="border-b border-gray-800/50 hover:bg-gray-900/30"> <tr class="border-b border-gray-800/50 hover:bg-gray-900/30">
<td class="py-2 pr-4 font-mono text-xs text-gray-200">${esc(field)}</td> <td class="py-2 pr-4 font-mono text-xs text-gray-200">${esc(s.source_name)}</td>
<td class="py-2 pr-4"><span class="px-2 py-0.5 rounded text-xs border ${STYLES[d.status]||''}">${LABELS[d.status]||d.status}</span></td> <td class="py-2 pr-4 text-xs text-gray-400">${(s.event_count||0).toLocaleString()}</td>
<td class="py-2 pr-4 text-xs ${d.status === 'missing_parser' ? 'text-amber-400 italic' : 'text-gray-400'}"> <td class="py-2 pr-4"><span class="px-2 py-0.5 rounded text-xs border ${STYLES[s.status]||''}">${LABELS[s.status]||s.status}</span></td>
${d.status === 'missing_parser' ? '⚠ ' + esc(suggestParser(field, d.data_sources)) : esc(d.parser_name || '—')} <td class="py-2 pr-4 text-xs ${s.status === 'parser_needed' ? 'text-amber-400 italic' : 'text-gray-400'}">
${s.status === 'parser_needed' ? '⚠ No parser loaded' : esc(s.parser) + ' (' + s.parser_fields + ' fields)'}
</td> </td>
<td class="py-2 text-xs text-gray-400">${d.rules?.length ? d.rules.map(r=>esc(r.rule)).join(', ') : '—'}</td> <td class="py-2 text-xs text-gray-400">${s.rules?.length ? s.rules.map(r=>esc(r.rule)).join(', ') : '—'}</td>
</tr>`).join('')} </tr>`).join('')}
</tbody></table></div>` </tbody></table></div>`
} }