diff --git a/backend/routers/ingest.py b/backend/routers/ingest.py
index 5b03f9a..a665731 100644
--- a/backend/routers/ingest.py
+++ b/backend/routers/ingest.py
@@ -92,12 +92,15 @@ async def simulate_filter(rule: FilterRule):
clauses = []
if rule.source:
- clauses.append(f'src.name = "{rule.source}"')
+ clauses.append(f"dataSource.name=='{rule.source}'")
if rule.event_type:
- clauses.append(f'event.type = "{rule.event_type}"')
+ clauses.append(f"event.type=='{rule.event_type}'")
- filter_expr = " AND ".join(clauses) if clauses else "true"
- query = f"| filter {filter_expr} | count() as events"
+ if clauses:
+ filter_expr = " and ".join(clauses)
+ query = f"| filter {filter_expr} | group events=count()"
+ else:
+ query = "| group events=count()"
try:
result = await s1_client.run_powerquery(query, from_dt, to_dt)
diff --git a/backend/routers/quality.py b/backend/routers/quality.py
index 7b266b7..3e3f8ae 100644
--- a/backend/routers/quality.py
+++ b/backend/routers/quality.py
@@ -2,11 +2,26 @@ from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from datetime import datetime, timedelta
from services import s1_client
+import os
import re
router = APIRouter()
+@router.get("/parsers")
+def list_parser_files():
+ """List parser filenames available under /app/parsers/ for the Test Runner."""
+ parsers_dir = "/app/parsers"
+ try:
+ names = sorted(
+ e.name for e in os.scandir(parsers_dir)
+ if e.is_file() and not e.name.startswith(".")
+ )
+ except FileNotFoundError:
+ names = []
+ return {"parsers": names, "count": len(names)}
+
+
def _date_range_hours(hours: int) -> tuple[str, str]:
now = datetime.utcnow()
return (
@@ -52,11 +67,41 @@ class TestParserRequest(BaseModel):
# Helpers
# ---------------------------------------------------------------------------
+def _flatten_dict(d: dict, prefix: str = "", out: dict | None = None) -> dict:
+ """Recursively flatten a nested dict into dotted keys."""
+ if out is None:
+ out = {}
+ if not isinstance(d, dict):
+ return out
+ for k, v in d.items():
+ key = f"{prefix}.{k}" if prefix else k
+ if isinstance(v, dict):
+ _flatten_dict(v, key, out)
+ else:
+ out[key] = v
+ return out
+
+
def _flatten_event(event: dict) -> dict:
- """Return a flat field→value dict from a PowerQuery result row."""
- if isinstance(event, dict):
- return {k: v for k, v in event.items()}
- return {}
+ """Return a flat field→value dict from a PowerQuery result row.
+
+ If the row only carries a JSON-stringified payload in `message` (i.e. the
+ parser wasn't applied at query time), parse and flatten it inline so the
+ UI can measure field population accurately. The original raw `message`
+ is preserved under its own key.
+ """
+ if not isinstance(event, dict):
+ return {}
+ flat = dict(event)
+ msg = flat.get("message")
+ if isinstance(msg, str) and msg.startswith("{") and msg.endswith("}"):
+ try:
+ parsed = __import__("json").loads(msg)
+ if isinstance(parsed, dict):
+ flat.update(_flatten_dict(parsed))
+ except Exception:
+ pass
+ return flat
def _extract_format_strings(content: str) -> list[str]:
@@ -204,6 +249,117 @@ async def test_parser(req: TestParserRequest):
format_strings = _extract_format_strings(content)
+ # ── JSON auto-extract path ──────────────────────────────────────────────
+ # SDL parsers that use `$=json{parse=json}$` (or any format containing
+ # `parse=json`) auto-extract every top-level JSON key as an attribute.
+ # The regex-based path can't model that — handle it explicitly so users
+ # can test JSON-shaped logs against JSON-mode parsers.
+ log_input = req.log_line.strip()
+ is_json_mode = any("parse=json" in f for f in format_strings) or log_input.startswith("{")
+ if is_json_mode:
+ import json as _json
+ # Support multi-line input (one JSON object per line, or a JSON array)
+ lines = [ln for ln in (l.strip() for l in log_input.splitlines()) if ln]
+ payloads: list[dict] = []
+ parse_errors: list[str] = []
+ # Single line: try direct parse; if it's a JSON array, expand.
+ if len(lines) == 1:
+ try:
+ obj = _json.loads(lines[0])
+ except Exception as e:
+ return {
+ "parser_name": req.parser_name,
+ "matched": False,
+ "message": f"Parser expects JSON but log line could not be parsed as JSON: {e}",
+ "fields": [],
+ }
+ if isinstance(obj, list):
+ payloads = [x for x in obj if isinstance(x, dict)]
+ elif isinstance(obj, dict):
+ payloads = [obj]
+ else:
+ return {
+ "parser_name": req.parser_name,
+ "matched": False,
+ "message": "Parser expects a JSON object (got scalar).",
+ "fields": [],
+ }
+ else:
+ # Multi-line: one JSON object per line (NDJSON)
+ for i, ln in enumerate(lines, 1):
+ try:
+ obj = _json.loads(ln)
+ if isinstance(obj, dict):
+ payloads.append(obj)
+ else:
+ parse_errors.append(f"line {i}: not a JSON object")
+ except Exception as e:
+ parse_errors.append(f"line {i}: {e}")
+
+ if not payloads:
+ return {
+ "parser_name": req.parser_name,
+ "matched": False,
+ "message": "No valid JSON objects found. " + " | ".join(parse_errors[:3]),
+ "fields": [],
+ }
+
+ # Use the first payload for the detail table; report totals.
+ payload = payloads[0]
+ extracted = _flatten_dict(payload)
+ # Apply lightweight rewrites if present (input/output/match/replace blocks).
+ # We only handle simple literal/regex matches with $0 or string replacements;
+ # this is best-effort, intended for quick visual verification.
+ rewrites_applied = []
+ rewrite_re = re.compile(
+ r'\{\s*input:\s*"([^"]+)"\s*,\s*output:\s*"([^"]+)"\s*,\s*match:\s*"((?:[^"\\]|\\.)*)"\s*,\s*replace:\s*"((?:[^"\\]|\\.)*)"\s*\}',
+ re.DOTALL,
+ )
+ derived: dict[str, str] = {}
+ for m in rewrite_re.finditer(content):
+ in_field, out_field, match_pat, replace_val = m.group(1), m.group(2), m.group(3), m.group(4)
+ src_val = extracted.get(in_field)
+ if src_val is None:
+ continue
+ try:
+ m2 = re.search(match_pat, str(src_val))
+ except re.error:
+ continue
+ if not m2:
+ continue
+ # SDL uses $0 for whole match, $1.. for groups. Translate to Python
+ # \g<0>, \g<1>, ... so re.sub doesn't read \0 as a null byte.
+ def _to_py_backref(s: str) -> str:
+ return re.sub(r"\$(\d+)", lambda mm: f"\\g<{mm.group(1)}>", s)
+ try:
+ val = re.sub(match_pat, _to_py_backref(replace_val), str(src_val), count=1)
+ except re.error:
+ val = replace_val
+ derived[out_field] = val
+ rewrites_applied.append({
+ "input": in_field, "input_value": src_val,
+ "output": out_field, "matched_on": match_pat, "result": val,
+ })
+
+ fields = (
+ [{"field": k, "value": v, "source": "json-extract"} for k, v in sorted(extracted.items())]
+ + [{"field": k, "value": v, "source": "rewrite"} for k, v in sorted(derived.items())]
+ )
+ return {
+ "parser_name": req.parser_name,
+ "matched": True,
+ "mode": "json",
+ "format_matched": "$=json{parse=json}$",
+ "fields": fields,
+ "rewrites_applied": rewrites_applied,
+ "extracted_count": len(extracted),
+ "derived_count": len(derived),
+ "payload_count": len(payloads),
+ "parse_errors": parse_errors,
+ "showing_payload": 1,
+ }
+
+ # ── Regex format-string path (original) ─────────────────────────────────
for fmt in format_strings:
try:
compiled, py_to_sdl = _sdl_format_to_regex(fmt)
@@ -221,6 +377,7 @@ async def test_parser(req: TestParserRequest):
return {
"parser_name": req.parser_name,
"matched": True,
+ "mode": "regex",
"format_matched": fmt,
"fields": fields,
}
diff --git a/frontend/index.html b/frontend/index.html
index 95e1f86..b5b1f46 100644
--- a/frontend/index.html
+++ b/frontend/index.html
@@ -825,6 +825,7 @@ function renderQuality() {
+
@@ -1050,12 +1052,21 @@ async function qtLoadParsers() {
if (qsSel) qsSel.innerHTML = sourcePlaceholder + sourceOptions
if (qpSel) qpSel.innerHTML = sourcePlaceholder + sourceOptions
- // Populate parser dropdown
+ // Populate parser dropdown from /app/parsers/ directory (not from coverage map)
const qtSel = document.getElementById('qt-parser')
if (qtSel) {
- parserNames.forEach(n => {
- const o = document.createElement('option'); o.value = n; o.textContent = n; qtSel.appendChild(o)
- })
+ try {
+ const p = await apiGet('/api/quality/parsers')
+ qtSel.innerHTML = ''
+ ;(p.parsers || []).forEach(n => {
+ const o = document.createElement('option'); o.value = n; o.textContent = n; qtSel.appendChild(o)
+ })
+ if (!p.parsers || p.parsers.length === 0) {
+ qtSel.innerHTML = ''
+ }
+ } catch (err) {
+ qtSel.innerHTML = ''
+ }
}
} catch(e) {
// If no sources synced yet, fall back to empty state with hint
@@ -1079,26 +1090,54 @@ async function qtTest() {
if (!r.matched) {
document.getElementById('qt-result').innerHTML = `
- ⚠ No format pattern matched this log line.
-
The parser's format strings didn't produce a match. Check that the log sample matches the expected format, or that the parser has SDL format strings (some parsers use grok/dottedJson which aren't tested here).
+ ⚠ ${esc(r.message || 'No format pattern matched this log line.')}
+
The parser's format strings didn't produce a match. Check that the log sample matches the expected format, or that the parser uses grok/dottedJson which aren't tested here.