mirror of
https://github.com/marcredhat/SIEM-toolkit-patched
synced 2026-06-10 21:31:19 +00:00
Fix Parser Test Runner JSON mode, Filter Simulator PQ syntax, dropdown source
- backend/routers/quality.py
* Add GET /api/quality/parsers (lists actual files in /app/parsers)
* Support SDL JSON auto-extract parsers ($=json{parse=json}$)
* Apply parser rewrite blocks with correct $0/$N backref translation
* Accept single JSON / JSON array / NDJSON in test-parser body
* Flatten JSON inside 'message' for Field Population coverage
- backend/routers/ingest.py
* Rewrite simulate-filter PowerQuery to valid SDL syntax
* Correct field name: src.name -> dataSource.name
- frontend/index.html
* Parser dropdown loads from /api/quality/parsers
* Add 'Last 7d' lookback option
* Render JSON-mode test results with badges + payload counter
This commit is contained in:
@@ -92,12 +92,15 @@ async def simulate_filter(rule: FilterRule):
|
|||||||
|
|
||||||
clauses = []
|
clauses = []
|
||||||
if rule.source:
|
if rule.source:
|
||||||
clauses.append(f'src.name = "{rule.source}"')
|
clauses.append(f"dataSource.name=='{rule.source}'")
|
||||||
if rule.event_type:
|
if rule.event_type:
|
||||||
clauses.append(f'event.type = "{rule.event_type}"')
|
clauses.append(f"event.type=='{rule.event_type}'")
|
||||||
|
|
||||||
filter_expr = " AND ".join(clauses) if clauses else "true"
|
if clauses:
|
||||||
query = f"| filter {filter_expr} | count() as events"
|
filter_expr = " and ".join(clauses)
|
||||||
|
query = f"| filter {filter_expr} | group events=count()"
|
||||||
|
else:
|
||||||
|
query = "| group events=count()"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = await s1_client.run_powerquery(query, from_dt, to_dt)
|
result = await s1_client.run_powerquery(query, from_dt, to_dt)
|
||||||
|
|||||||
+161
-4
@@ -2,11 +2,26 @@ from fastapi import APIRouter, HTTPException
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from services import s1_client
|
from services import s1_client
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/parsers")
|
||||||
|
def list_parser_files():
|
||||||
|
"""List parser filenames available under /app/parsers/ for the Test Runner."""
|
||||||
|
parsers_dir = "/app/parsers"
|
||||||
|
try:
|
||||||
|
names = sorted(
|
||||||
|
e.name for e in os.scandir(parsers_dir)
|
||||||
|
if e.is_file() and not e.name.startswith(".")
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
names = []
|
||||||
|
return {"parsers": names, "count": len(names)}
|
||||||
|
|
||||||
|
|
||||||
def _date_range_hours(hours: int) -> tuple[str, str]:
|
def _date_range_hours(hours: int) -> tuple[str, str]:
|
||||||
now = datetime.utcnow()
|
now = datetime.utcnow()
|
||||||
return (
|
return (
|
||||||
@@ -52,11 +67,41 @@ class TestParserRequest(BaseModel):
|
|||||||
# Helpers
|
# Helpers
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _flatten_dict(d: dict, prefix: str = "", out: dict | None = None) -> dict:
|
||||||
|
"""Recursively flatten a nested dict into dotted keys."""
|
||||||
|
if out is None:
|
||||||
|
out = {}
|
||||||
|
if not isinstance(d, dict):
|
||||||
|
return out
|
||||||
|
for k, v in d.items():
|
||||||
|
key = f"{prefix}.{k}" if prefix else k
|
||||||
|
if isinstance(v, dict):
|
||||||
|
_flatten_dict(v, key, out)
|
||||||
|
else:
|
||||||
|
out[key] = v
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _flatten_event(event: dict) -> dict:
|
def _flatten_event(event: dict) -> dict:
|
||||||
"""Return a flat field→value dict from a PowerQuery result row."""
|
"""Return a flat field→value dict from a PowerQuery result row.
|
||||||
if isinstance(event, dict):
|
|
||||||
return {k: v for k, v in event.items()}
|
If the row only carries a JSON-stringified payload in `message` (i.e. the
|
||||||
return {}
|
parser wasn't applied at query time), parse and flatten it inline so the
|
||||||
|
UI can measure field population accurately. The original raw `message`
|
||||||
|
is preserved under its own key.
|
||||||
|
"""
|
||||||
|
if not isinstance(event, dict):
|
||||||
|
return {}
|
||||||
|
flat = dict(event)
|
||||||
|
msg = flat.get("message")
|
||||||
|
if isinstance(msg, str) and msg.startswith("{") and msg.endswith("}"):
|
||||||
|
try:
|
||||||
|
parsed = __import__("json").loads(msg)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
flat.update(_flatten_dict(parsed))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return flat
|
||||||
|
|
||||||
|
|
||||||
def _extract_format_strings(content: str) -> list[str]:
|
def _extract_format_strings(content: str) -> list[str]:
|
||||||
@@ -204,6 +249,117 @@ async def test_parser(req: TestParserRequest):
|
|||||||
|
|
||||||
format_strings = _extract_format_strings(content)
|
format_strings = _extract_format_strings(content)
|
||||||
|
|
||||||
|
# ── JSON auto-extract path ──────────────────────────────────────────────
|
||||||
|
# SDL parsers that use `$=json{parse=json}$` (or any format containing
|
||||||
|
# `parse=json`) auto-extract every top-level JSON key as an attribute.
|
||||||
|
# The regex-based path can't model that — handle it explicitly so users
|
||||||
|
# can test JSON-shaped logs against JSON-mode parsers.
|
||||||
|
log_input = req.log_line.strip()
|
||||||
|
is_json_mode = any("parse=json" in f for f in format_strings) or log_input.startswith("{")
|
||||||
|
if is_json_mode:
|
||||||
|
import json as _json
|
||||||
|
# Support multi-line input (one JSON object per line, or a JSON array)
|
||||||
|
lines = [ln for ln in (l.strip() for l in log_input.splitlines()) if ln]
|
||||||
|
payloads: list[dict] = []
|
||||||
|
parse_errors: list[str] = []
|
||||||
|
# Single line: try direct parse; if it's a JSON array, expand.
|
||||||
|
if len(lines) == 1:
|
||||||
|
try:
|
||||||
|
obj = _json.loads(lines[0])
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"parser_name": req.parser_name,
|
||||||
|
"matched": False,
|
||||||
|
"message": f"Parser expects JSON but log line could not be parsed as JSON: {e}",
|
||||||
|
"fields": [],
|
||||||
|
}
|
||||||
|
if isinstance(obj, list):
|
||||||
|
payloads = [x for x in obj if isinstance(x, dict)]
|
||||||
|
elif isinstance(obj, dict):
|
||||||
|
payloads = [obj]
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"parser_name": req.parser_name,
|
||||||
|
"matched": False,
|
||||||
|
"message": "Parser expects a JSON object (got scalar).",
|
||||||
|
"fields": [],
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Multi-line: one JSON object per line (NDJSON)
|
||||||
|
for i, ln in enumerate(lines, 1):
|
||||||
|
try:
|
||||||
|
obj = _json.loads(ln)
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
payloads.append(obj)
|
||||||
|
else:
|
||||||
|
parse_errors.append(f"line {i}: not a JSON object")
|
||||||
|
except Exception as e:
|
||||||
|
parse_errors.append(f"line {i}: {e}")
|
||||||
|
|
||||||
|
if not payloads:
|
||||||
|
return {
|
||||||
|
"parser_name": req.parser_name,
|
||||||
|
"matched": False,
|
||||||
|
"message": "No valid JSON objects found. " + " | ".join(parse_errors[:3]),
|
||||||
|
"fields": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Use the first payload for the detail table; report totals.
|
||||||
|
payload = payloads[0]
|
||||||
|
extracted = _flatten_dict(payload)
|
||||||
|
# Apply lightweight rewrites if present (input/output/match/replace blocks).
|
||||||
|
# We only handle simple literal/regex matches with $0 or string replacements;
|
||||||
|
# this is best-effort, intended for quick visual verification.
|
||||||
|
rewrites_applied = []
|
||||||
|
rewrite_re = re.compile(
|
||||||
|
r'\{\s*input:\s*"([^"]+)"\s*,\s*output:\s*"([^"]+)"\s*,\s*match:\s*"((?:[^"\\]|\\.)*)"\s*,\s*replace:\s*"((?:[^"\\]|\\.)*)"\s*\}',
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
derived: dict[str, str] = {}
|
||||||
|
for m in rewrite_re.finditer(content):
|
||||||
|
in_field, out_field, match_pat, replace_val = m.group(1), m.group(2), m.group(3), m.group(4)
|
||||||
|
src_val = extracted.get(in_field)
|
||||||
|
if src_val is None:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
m2 = re.search(match_pat, str(src_val))
|
||||||
|
except re.error:
|
||||||
|
continue
|
||||||
|
if not m2:
|
||||||
|
continue
|
||||||
|
# SDL uses $0 for whole match, $1.. for groups. Translate to Python
|
||||||
|
# \g<0>, \g<1>, ... so re.sub doesn't read \0 as a null byte.
|
||||||
|
def _to_py_backref(s: str) -> str:
|
||||||
|
return re.sub(r"\$(\d+)", lambda mm: f"\\g<{mm.group(1)}>", s)
|
||||||
|
try:
|
||||||
|
val = re.sub(match_pat, _to_py_backref(replace_val), str(src_val), count=1)
|
||||||
|
except re.error:
|
||||||
|
val = replace_val
|
||||||
|
derived[out_field] = val
|
||||||
|
rewrites_applied.append({
|
||||||
|
"input": in_field, "input_value": src_val,
|
||||||
|
"output": out_field, "matched_on": match_pat, "result": val,
|
||||||
|
})
|
||||||
|
|
||||||
|
fields = (
|
||||||
|
[{"field": k, "value": v, "source": "json-extract"} for k, v in sorted(extracted.items())]
|
||||||
|
+ [{"field": k, "value": v, "source": "rewrite"} for k, v in sorted(derived.items())]
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"parser_name": req.parser_name,
|
||||||
|
"matched": True,
|
||||||
|
"mode": "json",
|
||||||
|
"format_matched": "$=json{parse=json}$",
|
||||||
|
"fields": fields,
|
||||||
|
"rewrites_applied": rewrites_applied,
|
||||||
|
"extracted_count": len(extracted),
|
||||||
|
"derived_count": len(derived),
|
||||||
|
"payload_count": len(payloads),
|
||||||
|
"parse_errors": parse_errors,
|
||||||
|
"showing_payload": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Regex format-string path (original) ─────────────────────────────────
|
||||||
for fmt in format_strings:
|
for fmt in format_strings:
|
||||||
try:
|
try:
|
||||||
compiled, py_to_sdl = _sdl_format_to_regex(fmt)
|
compiled, py_to_sdl = _sdl_format_to_regex(fmt)
|
||||||
@@ -221,6 +377,7 @@ async def test_parser(req: TestParserRequest):
|
|||||||
return {
|
return {
|
||||||
"parser_name": req.parser_name,
|
"parser_name": req.parser_name,
|
||||||
"matched": True,
|
"matched": True,
|
||||||
|
"mode": "regex",
|
||||||
"format_matched": fmt,
|
"format_matched": fmt,
|
||||||
"fields": fields,
|
"fields": fields,
|
||||||
}
|
}
|
||||||
|
|||||||
+51
-12
@@ -696,6 +696,7 @@ function renderQuality() {
|
|||||||
<option value="6">Last 6h</option>
|
<option value="6">Last 6h</option>
|
||||||
<option value="24" selected>Last 24h</option>
|
<option value="24" selected>Last 24h</option>
|
||||||
<option value="72">Last 3d</option>
|
<option value="72">Last 3d</option>
|
||||||
|
<option value="168">Last 7d</option>
|
||||||
</select>
|
</select>
|
||||||
<select id="qs-limit" class="bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-300 focus:outline-none focus:border-purple-600">
|
<select id="qs-limit" class="bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-300 focus:outline-none focus:border-purple-600">
|
||||||
<option value="10" selected>10 events</option>
|
<option value="10" selected>10 events</option>
|
||||||
@@ -721,6 +722,7 @@ function renderQuality() {
|
|||||||
<option value="6">Last 6h</option>
|
<option value="6">Last 6h</option>
|
||||||
<option value="24" selected>Last 24h</option>
|
<option value="24" selected>Last 24h</option>
|
||||||
<option value="72">Last 3d</option>
|
<option value="72">Last 3d</option>
|
||||||
|
<option value="168">Last 7d</option>
|
||||||
</select>
|
</select>
|
||||||
<button onclick="qpAnalyze()" id="btn-qp"
|
<button onclick="qpAnalyze()" id="btn-qp"
|
||||||
class="px-4 py-2 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white transition-colors">Analyze</button>
|
class="px-4 py-2 text-sm bg-purple-700 hover:bg-purple-600 rounded-lg text-white transition-colors">Analyze</button>
|
||||||
@@ -911,12 +913,21 @@ async function qtLoadParsers() {
|
|||||||
if (qsSel) qsSel.innerHTML = sourcePlaceholder + sourceOptions
|
if (qsSel) qsSel.innerHTML = sourcePlaceholder + sourceOptions
|
||||||
if (qpSel) qpSel.innerHTML = sourcePlaceholder + sourceOptions
|
if (qpSel) qpSel.innerHTML = sourcePlaceholder + sourceOptions
|
||||||
|
|
||||||
// Populate parser dropdown
|
// Populate parser dropdown from /app/parsers/ directory (not from coverage map)
|
||||||
const qtSel = document.getElementById('qt-parser')
|
const qtSel = document.getElementById('qt-parser')
|
||||||
if (qtSel) {
|
if (qtSel) {
|
||||||
parserNames.forEach(n => {
|
try {
|
||||||
const o = document.createElement('option'); o.value = n; o.textContent = n; qtSel.appendChild(o)
|
const p = await apiGet('/api/quality/parsers')
|
||||||
})
|
qtSel.innerHTML = '<option value="">— select parser —</option>'
|
||||||
|
;(p.parsers || []).forEach(n => {
|
||||||
|
const o = document.createElement('option'); o.value = n; o.textContent = n; qtSel.appendChild(o)
|
||||||
|
})
|
||||||
|
if (!p.parsers || p.parsers.length === 0) {
|
||||||
|
qtSel.innerHTML = '<option value="">— no parser files in /app/parsers — drop JSON files there or click "Load SDL Parsers" —</option>'
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
qtSel.innerHTML = '<option value="">— could not load parsers: ' + esc(err.message || err) + ' —</option>'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
// If no sources synced yet, fall back to empty state with hint
|
// If no sources synced yet, fall back to empty state with hint
|
||||||
@@ -940,26 +951,54 @@ async function qtTest() {
|
|||||||
if (!r.matched) {
|
if (!r.matched) {
|
||||||
document.getElementById('qt-result').innerHTML = `
|
document.getElementById('qt-result').innerHTML = `
|
||||||
<div class="p-3 bg-amber-900/30 border border-amber-700/50 rounded-lg text-sm text-amber-300">
|
<div class="p-3 bg-amber-900/30 border border-amber-700/50 rounded-lg text-sm text-amber-300">
|
||||||
⚠ No format pattern matched this log line.
|
⚠ ${esc(r.message || 'No format pattern matched this log line.')}
|
||||||
<p class="text-xs text-amber-500 mt-1">The parser's format strings didn't produce a match. Check that the log sample matches the expected format, or that the parser has SDL format strings (some parsers use grok/dottedJson which aren't tested here).</p>
|
<p class="text-xs text-amber-500 mt-1">The parser's format strings didn't produce a match. Check that the log sample matches the expected format, or that the parser uses grok/dottedJson which aren't tested here.</p>
|
||||||
</div>`
|
</div>`
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
const rows = r.fields.map(f => `<tr class="border-b border-gray-800/40">
|
const extracts = (r.fields || []).filter(f => f.source !== 'rewrite')
|
||||||
|
const rewrites = (r.fields || []).filter(f => f.source === 'rewrite')
|
||||||
|
const rowsExtract = extracts.map(f => `<tr class="border-b border-gray-800/40">
|
||||||
<td class="py-1.5 pr-4 font-mono text-xs text-purple-300">${esc(f.field)}</td>
|
<td class="py-1.5 pr-4 font-mono text-xs text-purple-300">${esc(f.field)}</td>
|
||||||
<td class="py-1.5 font-mono text-xs text-gray-200">${esc(String(f.value))}</td>
|
<td class="py-1.5 font-mono text-xs text-gray-200">${esc(String(f.value))}</td>
|
||||||
</tr>`).join('')
|
</tr>`).join('')
|
||||||
|
const rowsRewrite = rewrites.map(f => `<tr class="border-b border-gray-800/40">
|
||||||
|
<td class="py-1.5 pr-4 font-mono text-xs text-emerald-300">${esc(f.field)}</td>
|
||||||
|
<td class="py-1.5 font-mono text-xs text-gray-200">${esc(String(f.value))}</td>
|
||||||
|
</tr>`).join('')
|
||||||
|
const modeBadge = r.mode === 'json'
|
||||||
|
? '<span class="px-2 py-0.5 ml-2 text-xs rounded bg-purple-900/60 border border-purple-700 text-purple-300">JSON auto-extract</span>'
|
||||||
|
: '<span class="px-2 py-0.5 ml-2 text-xs rounded bg-blue-900/60 border border-blue-700 text-blue-300">regex format</span>'
|
||||||
|
const counts = r.mode === 'json'
|
||||||
|
? `<span class="text-gray-500">${r.extracted_count} extracted · ${r.derived_count} rewritten` +
|
||||||
|
(r.payload_count > 1 ? ` · showing payload ${r.showing_payload}/${r.payload_count}` : '') +
|
||||||
|
`</span>` : ''
|
||||||
|
const parseWarn = (r.parse_errors && r.parse_errors.length)
|
||||||
|
? `<div class="mt-2 p-2 bg-amber-900/30 border border-amber-700/50 rounded text-xs text-amber-300">
|
||||||
|
${r.parse_errors.length} line(s) skipped: ${r.parse_errors.slice(0,3).map(esc).join(' | ')}${r.parse_errors.length>3?' …':''}
|
||||||
|
</div>` : ''
|
||||||
document.getElementById('qt-result').innerHTML = `
|
document.getElementById('qt-result').innerHTML = `
|
||||||
<div class="mb-3 p-2 bg-gray-800/60 rounded text-xs text-gray-500 font-mono break-all">
|
<div class="mb-3 p-2 bg-gray-800/60 rounded text-xs text-gray-500 font-mono break-all">
|
||||||
<span class="text-gray-600">Matched format: </span>${esc(r.format_matched)}
|
<span class="text-gray-600">Matched format: </span>${esc(r.format_matched)} ${modeBadge}
|
||||||
|
<div class="mt-1">${counts}</div>
|
||||||
|
${parseWarn}
|
||||||
</div>
|
</div>
|
||||||
|
<table class="w-full mb-4">
|
||||||
|
<thead><tr class="text-left text-gray-500 border-b border-gray-800">
|
||||||
|
<th class="pb-2 pr-4 text-xs font-medium">Extracted Field</th>
|
||||||
|
<th class="pb-2 text-xs font-medium">Value</th>
|
||||||
|
</tr></thead>
|
||||||
|
<tbody>${rowsExtract}</tbody>
|
||||||
|
</table>
|
||||||
|
${rewrites.length ? `
|
||||||
|
<h4 class="text-xs font-semibold text-emerald-300 mb-2">Derived (rewrites applied — ${rewrites.length})</h4>
|
||||||
<table class="w-full">
|
<table class="w-full">
|
||||||
<thead><tr class="text-left text-gray-500 border-b border-gray-800">
|
<thead><tr class="text-left text-gray-500 border-b border-gray-800">
|
||||||
<th class="pb-2 pr-4 text-xs font-medium">Field</th>
|
<th class="pb-2 pr-4 text-xs font-medium">Output Field</th>
|
||||||
<th class="pb-2 text-xs font-medium">Extracted Value</th>
|
<th class="pb-2 text-xs font-medium">Value</th>
|
||||||
</tr></thead>
|
</tr></thead>
|
||||||
<tbody>${rows}</tbody>
|
<tbody>${rowsRewrite}</tbody>
|
||||||
</table>`
|
</table>` : ''}`
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
document.getElementById('qt-result').innerHTML = errBox(e.message)
|
document.getElementById('qt-result').innerHTML = errBox(e.message)
|
||||||
} finally { setBtn('btn-qt', false, 'Test') }
|
} finally { setBtn('btn-qt', false, 'Test') }
|
||||||
|
|||||||
Reference in New Issue
Block a user