Add Parser Quality page: Live Event Sampler, Field Population Rate, Parser Test Runner

- New /api/quality router with three endpoints:
  sample-events: pull raw events from a source via PowerQuery
  field-population: measure % of events with each SDL field populated;
    surfaces dataSource.name correctly (100% when filtered by it) and
    returns fields_seen_in_sample so you can see what IS being extracted
  test-parser: converts SDL \$field=pattern\$ format strings to Python
    named-group regex and tests against a pasted raw log line
- New "Parser Quality" nav item and page with all three tools
- Home page card added for Parser Quality
- Field population UI shows per-field colour-coded progress bars plus
  a chip list of fields actually present in the sample

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mick
2026-05-19 12:53:48 -04:00
parent 058b1e7cf1
commit 999c0f7b83
3 changed files with 457 additions and 1 deletions
+222
View File
@@ -20,6 +20,7 @@
<a href="#/" data-page="home" class="nav-link flex items-center px-3 py-2 rounded-lg text-sm cursor-pointer">Overview</a>
<a href="#/coverage" data-page="coverage" class="nav-link flex items-center px-3 py-2 rounded-lg text-sm cursor-pointer">Parser Coverage</a>
<a href="#/ingest" data-page="ingest" class="nav-link flex items-center px-3 py-2 rounded-lg text-sm cursor-pointer">Ingest Dashboard</a>
<a href="#/quality" data-page="quality" class="nav-link flex items-center px-3 py-2 rounded-lg text-sm cursor-pointer">Parser Quality</a>
<a href="#/onboarding" data-page="onboarding" class="nav-link flex items-center px-3 py-2 rounded-lg text-sm cursor-pointer">Onboarding</a>
</nav>
<div class="p-3 border-t border-gray-800">
@@ -122,6 +123,7 @@ function renderHome() {
<div class="grid grid-cols-1 md:grid-cols-3 gap-5">
${homeCard('#/coverage','Parser Coverage Map','Cross-reference SDL parser fields against STAR and Sigma rule fields. Surface parsed-but-unused fields as reduction candidates.','Open Coverage Map','from-purple-700 to-purple-900')}
${homeCard('#/ingest','Ingest Dashboard','Visualize event volume by source and type. Project monthly GB costs and simulate exclusion filters before applying them.','Open Dashboard','from-blue-700 to-blue-900')}
${homeCard('#/quality','Parser Quality','Sample live events to see which fields landed. Measure field population rates and test parser patterns against raw log lines.','Open Quality Tools','from-amber-700 to-amber-900')}
${homeCard('#/onboarding','Onboarding Accelerator','Step-by-step guide for onboarding a new log source using Claude Code directly — no API key required.','View Guide','from-emerald-700 to-emerald-900')}
</div>
</div>`)
@@ -637,6 +639,225 @@ async function saveSettings() {
} finally { setBtn('st-save', false, 'Save to .env') }
}
// ── Parser Quality ────────────────────────────────────────────────────────
function renderQuality() {
set(`<div class="p-8 max-w-5xl space-y-6">
<div>
<h1 class="text-xl font-bold text-white">Parser Quality</h1>
<p class="text-sm text-gray-400 mt-1">Inspect live events · measure field coverage · test parser patterns</p>
</div>
<!-- Live Event Sampler -->
<div class="bg-gray-900 border border-gray-800 rounded-xl p-5">
<h2 class="text-sm font-semibold text-white mb-1">Live Event Sampler</h2>
<p class="text-xs text-gray-500 mb-4">Pull recent raw events from a source and see exactly which fields landed — and which are missing.</p>
<div class="flex gap-3 flex-wrap mb-4">
<input id="qs-source" placeholder="dataSource.name — e.g. Palo Alto Networks Firewall"
class="flex-1 min-w-60 bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-200 placeholder-gray-600 focus:outline-none focus:border-purple-600">
<select id="qs-hours" class="bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-300 focus:outline-none focus:border-purple-600">
<option value="1">Last 1h</option>
<option value="6">Last 6h</option>
<option value="24" selected>Last 24h</option>
<option value="72">Last 3d</option>
</select>
<select id="qs-limit" class="bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-300 focus:outline-none focus:border-purple-600">
<option value="10">10 events</option>
<option value="20" selected>20 events</option>
<option value="50">50 events</option>
</select>
<button onclick="qsSample()" id="btn-qs"
class="px-4 py-2 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white transition-colors">Sample</button>
</div>
<div id="qs-result"></div>
</div>
<!-- Field Population Rate -->
<div class="bg-gray-900 border border-gray-800 rounded-xl p-5">
<h2 class="text-sm font-semibold text-white mb-1">Field Population Rate</h2>
<p class="text-xs text-gray-500 mb-4">Sample up to 500 events and measure what % have each key field populated. Low rates flag parser extraction failures.</p>
<div class="flex gap-3 flex-wrap mb-3">
<input id="qp-source" placeholder="dataSource.name"
class="flex-1 min-w-60 bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-200 placeholder-gray-600 focus:outline-none focus:border-purple-600">
<select id="qp-hours" class="bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-300 focus:outline-none focus:border-purple-600">
<option value="1">Last 1h</option>
<option value="6">Last 6h</option>
<option value="24" selected>Last 24h</option>
<option value="72">Last 3d</option>
</select>
<button onclick="qpAnalyze()" id="btn-qp"
class="px-4 py-2 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white transition-colors">Analyze</button>
</div>
<div class="mb-3">
<label class="text-xs text-gray-500 block mb-1">Fields to check <span class="text-gray-600">(comma-separated)</span></label>
<input id="qp-fields" value="src.ip,dst.ip,user.name,event.type,src.process.name,src.process.cmdline,tgt.file.path,network.direction"
class="w-full bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-xs text-gray-300 font-mono focus:outline-none focus:border-purple-600">
</div>
<div id="qp-result"></div>
</div>
<!-- Parser Test Runner -->
<div class="bg-gray-900 border border-gray-800 rounded-xl p-5">
<h2 class="text-sm font-semibold text-white mb-1">Parser Test Runner</h2>
<p class="text-xs text-gray-500 mb-4">Paste a raw log line and pick a loaded parser — see which fields the format patterns would extract without deploying anything.</p>
<div class="flex gap-3 flex-wrap mb-3">
<select id="qt-parser" class="flex-1 bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-300 focus:outline-none focus:border-purple-600">
<option value="">— select parser —</option>
</select>
<button onclick="qtTest()" id="btn-qt"
class="px-4 py-2 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white transition-colors">Test</button>
</div>
<textarea id="qt-log" rows="3" placeholder="Paste raw log line here…"
class="w-full bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-xs text-gray-300 font-mono placeholder-gray-600 focus:outline-none focus:border-purple-600 mb-3"></textarea>
<div id="qt-result"></div>
</div>
</div>`)
qtLoadParsers()
}
// ── Live Event Sampler ─────────────────────────────────────────────────────
async function qsSample() {
const source = document.getElementById('qs-source').value.trim()
if (!source) { document.getElementById('qs-result').innerHTML = errBox('Enter a source name.'); return }
setBtn('btn-qs', true)
document.getElementById('qs-result').innerHTML = '<p class="text-gray-500 text-sm animate-pulse">Querying data lake…</p>'
try {
const r = await apiPost('/api/quality/sample-events', {
source,
limit: +document.getElementById('qs-limit').value,
hours: +document.getElementById('qs-hours').value,
})
if (!r.events?.length) {
document.getElementById('qs-result').innerHTML = '<p class="text-gray-500 text-sm">No events found for this source in the selected window.</p>'
return
}
// Collect all field names across events
const allFields = [...new Set(r.events.flatMap(e => Object.keys(e)))].sort()
const rows = r.events.map(ev => {
const cells = allFields.map(f => {
const v = ev[f]
const empty = v === null || v === undefined || v === '' || v === 'null'
return `<td class="py-1.5 px-2 text-xs font-mono max-w-32 truncate ${empty ? 'text-gray-700 italic' : 'text-gray-300'}" title="${esc(String(v??''))}">${empty ? '∅' : esc(String(v).slice(0,40))}</td>`
}).join('')
return `<tr class="border-b border-gray-800/40 hover:bg-gray-800/20">${cells}</tr>`
}).join('')
const headers = allFields.map(f => `<th class="pb-2 px-2 text-left font-medium whitespace-nowrap text-xs">${esc(f)}</th>`).join('')
document.getElementById('qs-result').innerHTML = `
<p class="text-xs text-gray-500 mb-2">${r.count} events · ${r.hours}h window · ${allFields.length} fields seen</p>
<div class="overflow-x-auto max-h-72 overflow-y-auto rounded border border-gray-800">
<table class="text-xs min-w-full">
<thead class="sticky top-0 bg-gray-900 text-gray-500 border-b border-gray-800"><tr>${headers}</tr></thead>
<tbody>${rows}</tbody>
</table>
</div>`
} catch(e) {
document.getElementById('qs-result').innerHTML = errBox(e.message)
} finally { setBtn('btn-qs', false, 'Sample') }
}
// ── Field Population Rate ──────────────────────────────────────────────────
async function qpAnalyze() {
const source = document.getElementById('qp-source').value.trim()
if (!source) { document.getElementById('qp-result').innerHTML = errBox('Enter a source name.'); return }
setBtn('btn-qp', true)
document.getElementById('qp-result').innerHTML = '<p class="text-gray-500 text-sm animate-pulse">Sampling events…</p>'
try {
const fieldsRaw = document.getElementById('qp-fields').value
const fields = fieldsRaw.split(',').map(f => f.trim()).filter(Boolean)
const r = await apiPost('/api/quality/field-population', {
source, hours: +document.getElementById('qp-hours').value, fields
})
const rows = r.fields.map(f => {
const pct = f.rate
const color = pct >= 80 ? 'bg-emerald-500' : pct >= 40 ? 'bg-amber-500' : 'bg-red-500'
const textColor = pct >= 80 ? 'text-emerald-400' : pct >= 40 ? 'text-amber-400' : 'text-red-400'
return `<tr class="border-b border-gray-800/40">
<td class="py-2 pr-4 font-mono text-xs text-gray-200">${esc(f.field)}</td>
<td class="py-2 pr-4 text-xs ${textColor} font-semibold w-16">${pct}%</td>
<td class="py-2 pr-4 w-48">
<div class="h-2 bg-gray-800 rounded-full overflow-hidden">
<div class="h-full ${color} rounded-full transition-all" style="width:${pct}%"></div>
</div>
</td>
<td class="py-2 text-xs text-gray-600">${f.populated.toLocaleString()} / ${f.total.toLocaleString()}</td>
</tr>`
}).join('')
document.getElementById('qp-result').innerHTML = `
<p class="text-xs text-gray-500 mb-3">${r.total_sampled} events sampled · ${r.hours}h window — sorted by worst coverage first</p>
<table class="w-full mb-4">
<thead><tr class="text-left text-gray-500 border-b border-gray-800">
<th class="pb-2 pr-4 text-xs font-medium">Field</th>
<th class="pb-2 pr-4 text-xs font-medium">Rate</th>
<th class="pb-2 pr-4 text-xs font-medium">Coverage</th>
<th class="pb-2 text-xs font-medium">Events</th>
</tr></thead>
<tbody>${rows}</tbody>
</table>
${r.fields_seen_in_sample?.length ? `
<div class="border-t border-gray-800 pt-3">
<p class="text-xs text-gray-500 mb-1">Fields actually present in sample <span class="text-gray-600">(${r.fields_seen_in_sample.length} total)</span></p>
<div class="flex flex-wrap gap-1">${r.fields_seen_in_sample.map(f =>
`<span class="px-2 py-0.5 bg-gray-800 rounded text-xs font-mono text-gray-400">${esc(f)}</span>`).join('')}
</div>
</div>` : ''}`
} catch(e) {
document.getElementById('qp-result').innerHTML = errBox(e.message)
} finally { setBtn('btn-qp', false, 'Analyze') }
}
// ── Parser Test Runner ─────────────────────────────────────────────────────
async function qtLoadParsers() {
try {
const r = await apiGet('/api/coverage/map')
const names = [...new Set((r.sources || []).map(s => s.parser).filter(Boolean))].sort()
const sel = document.getElementById('qt-parser')
if (!sel) return
names.forEach(n => {
const o = document.createElement('option'); o.value = n; o.textContent = n; sel.appendChild(o)
})
} catch {}
}
async function qtTest() {
const parser = document.getElementById('qt-parser').value
const log = document.getElementById('qt-log').value.trim()
if (!parser) { document.getElementById('qt-result').innerHTML = errBox('Select a parser.'); return }
if (!log) { document.getElementById('qt-result').innerHTML = errBox('Paste a log line.'); return }
setBtn('btn-qt', true)
document.getElementById('qt-result').innerHTML = '<p class="text-gray-500 text-sm animate-pulse">Testing…</p>'
try {
const r = await apiPost('/api/quality/test-parser', { parser_name: parser, log_line: log })
if (!r.matched) {
document.getElementById('qt-result').innerHTML = `
<div class="p-3 bg-amber-900/30 border border-amber-700/50 rounded-lg text-sm text-amber-300">
⚠ No format pattern matched this log line.
<p class="text-xs text-amber-500 mt-1">The parser's format strings didn't produce a match. Check that the log sample matches the expected format, or that the parser has SDL format strings (some parsers use grok/dottedJson which aren't tested here).</p>
</div>`
return
}
const rows = r.fields.map(f => `<tr class="border-b border-gray-800/40">
<td class="py-1.5 pr-4 font-mono text-xs text-purple-300">${esc(f.field)}</td>
<td class="py-1.5 font-mono text-xs text-gray-200">${esc(String(f.value))}</td>
</tr>`).join('')
document.getElementById('qt-result').innerHTML = `
<div class="mb-3 p-2 bg-gray-800/60 rounded text-xs text-gray-500 font-mono break-all">
<span class="text-gray-600">Matched format: </span>${esc(r.format_matched)}
</div>
<table class="w-full">
<thead><tr class="text-left text-gray-500 border-b border-gray-800">
<th class="pb-2 pr-4 text-xs font-medium">Field</th>
<th class="pb-2 text-xs font-medium">Extracted Value</th>
</tr></thead>
<tbody>${rows}</tbody>
</table>`
} catch(e) {
document.getElementById('qt-result').innerHTML = errBox(e.message)
} finally { setBtn('btn-qt', false, 'Test') }
}
// ── Router ────────────────────────────────────────────────────────────────
function set(html) { document.getElementById('main').innerHTML = html }
@@ -652,6 +873,7 @@ function route() {
const h = location.hash || '#/'
if (h === '#/coverage') { updateNav('coverage'); renderCoverage() }
else if (h === '#/ingest') { updateNav('ingest'); renderIngest() }
else if (h === '#/quality') { updateNav('quality'); renderQuality() }
else if (h === '#/onboarding') { updateNav('onboarding'); renderOnboarding() }
else if (h === '#/settings') { updateNav('settings'); renderSettings() }
else { updateNav('home'); renderHome() }