diff --git a/backend/routers/ingest.py b/backend/routers/ingest.py index 5b03f9a..a665731 100644 --- a/backend/routers/ingest.py +++ b/backend/routers/ingest.py @@ -92,12 +92,15 @@ async def simulate_filter(rule: FilterRule): clauses = [] if rule.source: - clauses.append(f'src.name = "{rule.source}"') + clauses.append(f"dataSource.name=='{rule.source}'") if rule.event_type: - clauses.append(f'event.type = "{rule.event_type}"') + clauses.append(f"event.type=='{rule.event_type}'") - filter_expr = " AND ".join(clauses) if clauses else "true" - query = f"| filter {filter_expr} | count() as events" + if clauses: + filter_expr = " and ".join(clauses) + query = f"| filter {filter_expr} | group events=count()" + else: + query = "| group events=count()" try: result = await s1_client.run_powerquery(query, from_dt, to_dt) diff --git a/backend/routers/quality.py b/backend/routers/quality.py index 7b266b7..3e3f8ae 100644 --- a/backend/routers/quality.py +++ b/backend/routers/quality.py @@ -2,11 +2,26 @@ from fastapi import APIRouter, HTTPException from pydantic import BaseModel from datetime import datetime, timedelta from services import s1_client +import os import re router = APIRouter() +@router.get("/parsers") +def list_parser_files(): + """List parser filenames available under /app/parsers/ for the Test Runner.""" + parsers_dir = "/app/parsers" + try: + names = sorted( + e.name for e in os.scandir(parsers_dir) + if e.is_file() and not e.name.startswith(".") + ) + except FileNotFoundError: + names = [] + return {"parsers": names, "count": len(names)} + + def _date_range_hours(hours: int) -> tuple[str, str]: now = datetime.utcnow() return ( @@ -52,11 +67,41 @@ class TestParserRequest(BaseModel): # Helpers # --------------------------------------------------------------------------- +def _flatten_dict(d: dict, prefix: str = "", out: dict | None = None) -> dict: + """Recursively flatten a nested dict into dotted keys.""" + if out is None: + out = {} + if not isinstance(d, dict): + return out + for k, v in d.items(): + key = f"{prefix}.{k}" if prefix else k + if isinstance(v, dict): + _flatten_dict(v, key, out) + else: + out[key] = v + return out + + def _flatten_event(event: dict) -> dict: - """Return a flat field→value dict from a PowerQuery result row.""" - if isinstance(event, dict): - return {k: v for k, v in event.items()} - return {} + """Return a flat field→value dict from a PowerQuery result row. + + If the row only carries a JSON-stringified payload in `message` (i.e. the + parser wasn't applied at query time), parse and flatten it inline so the + UI can measure field population accurately. The original raw `message` + is preserved under its own key. + """ + if not isinstance(event, dict): + return {} + flat = dict(event) + msg = flat.get("message") + if isinstance(msg, str) and msg.startswith("{") and msg.endswith("}"): + try: + parsed = __import__("json").loads(msg) + if isinstance(parsed, dict): + flat.update(_flatten_dict(parsed)) + except Exception: + pass + return flat def _extract_format_strings(content: str) -> list[str]: @@ -204,6 +249,117 @@ async def test_parser(req: TestParserRequest): format_strings = _extract_format_strings(content) + # ── JSON auto-extract path ────────────────────────────────────────────── + # SDL parsers that use `$=json{parse=json}$` (or any format containing + # `parse=json`) auto-extract every top-level JSON key as an attribute. + # The regex-based path can't model that — handle it explicitly so users + # can test JSON-shaped logs against JSON-mode parsers. + log_input = req.log_line.strip() + is_json_mode = any("parse=json" in f for f in format_strings) or log_input.startswith("{") + if is_json_mode: + import json as _json + # Support multi-line input (one JSON object per line, or a JSON array) + lines = [ln for ln in (l.strip() for l in log_input.splitlines()) if ln] + payloads: list[dict] = [] + parse_errors: list[str] = [] + # Single line: try direct parse; if it's a JSON array, expand. + if len(lines) == 1: + try: + obj = _json.loads(lines[0]) + except Exception as e: + return { + "parser_name": req.parser_name, + "matched": False, + "message": f"Parser expects JSON but log line could not be parsed as JSON: {e}", + "fields": [], + } + if isinstance(obj, list): + payloads = [x for x in obj if isinstance(x, dict)] + elif isinstance(obj, dict): + payloads = [obj] + else: + return { + "parser_name": req.parser_name, + "matched": False, + "message": "Parser expects a JSON object (got scalar).", + "fields": [], + } + else: + # Multi-line: one JSON object per line (NDJSON) + for i, ln in enumerate(lines, 1): + try: + obj = _json.loads(ln) + if isinstance(obj, dict): + payloads.append(obj) + else: + parse_errors.append(f"line {i}: not a JSON object") + except Exception as e: + parse_errors.append(f"line {i}: {e}") + + if not payloads: + return { + "parser_name": req.parser_name, + "matched": False, + "message": "No valid JSON objects found. " + " | ".join(parse_errors[:3]), + "fields": [], + } + + # Use the first payload for the detail table; report totals. + payload = payloads[0] + extracted = _flatten_dict(payload) + # Apply lightweight rewrites if present (input/output/match/replace blocks). + # We only handle simple literal/regex matches with $0 or string replacements; + # this is best-effort, intended for quick visual verification. + rewrites_applied = [] + rewrite_re = re.compile( + r'\{\s*input:\s*"([^"]+)"\s*,\s*output:\s*"([^"]+)"\s*,\s*match:\s*"((?:[^"\\]|\\.)*)"\s*,\s*replace:\s*"((?:[^"\\]|\\.)*)"\s*\}', + re.DOTALL, + ) + derived: dict[str, str] = {} + for m in rewrite_re.finditer(content): + in_field, out_field, match_pat, replace_val = m.group(1), m.group(2), m.group(3), m.group(4) + src_val = extracted.get(in_field) + if src_val is None: + continue + try: + m2 = re.search(match_pat, str(src_val)) + except re.error: + continue + if not m2: + continue + # SDL uses $0 for whole match, $1.. for groups. Translate to Python + # \g<0>, \g<1>, ... so re.sub doesn't read \0 as a null byte. + def _to_py_backref(s: str) -> str: + return re.sub(r"\$(\d+)", lambda mm: f"\\g<{mm.group(1)}>", s) + try: + val = re.sub(match_pat, _to_py_backref(replace_val), str(src_val), count=1) + except re.error: + val = replace_val + derived[out_field] = val + rewrites_applied.append({ + "input": in_field, "input_value": src_val, + "output": out_field, "matched_on": match_pat, "result": val, + }) + + fields = ( + [{"field": k, "value": v, "source": "json-extract"} for k, v in sorted(extracted.items())] + + [{"field": k, "value": v, "source": "rewrite"} for k, v in sorted(derived.items())] + ) + return { + "parser_name": req.parser_name, + "matched": True, + "mode": "json", + "format_matched": "$=json{parse=json}$", + "fields": fields, + "rewrites_applied": rewrites_applied, + "extracted_count": len(extracted), + "derived_count": len(derived), + "payload_count": len(payloads), + "parse_errors": parse_errors, + "showing_payload": 1, + } + + # ── Regex format-string path (original) ───────────────────────────────── for fmt in format_strings: try: compiled, py_to_sdl = _sdl_format_to_regex(fmt) @@ -221,6 +377,7 @@ async def test_parser(req: TestParserRequest): return { "parser_name": req.parser_name, "matched": True, + "mode": "regex", "format_matched": fmt, "fields": fields, } diff --git a/frontend/index.html b/frontend/index.html index 6a4723e..f4c5ebb 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -696,6 +696,7 @@ function renderQuality() { + @@ -911,12 +913,21 @@ async function qtLoadParsers() { if (qsSel) qsSel.innerHTML = sourcePlaceholder + sourceOptions if (qpSel) qpSel.innerHTML = sourcePlaceholder + sourceOptions - // Populate parser dropdown + // Populate parser dropdown from /app/parsers/ directory (not from coverage map) const qtSel = document.getElementById('qt-parser') if (qtSel) { - parserNames.forEach(n => { - const o = document.createElement('option'); o.value = n; o.textContent = n; qtSel.appendChild(o) - }) + try { + const p = await apiGet('/api/quality/parsers') + qtSel.innerHTML = '' + ;(p.parsers || []).forEach(n => { + const o = document.createElement('option'); o.value = n; o.textContent = n; qtSel.appendChild(o) + }) + if (!p.parsers || p.parsers.length === 0) { + qtSel.innerHTML = '' + } + } catch (err) { + qtSel.innerHTML = '' + } } } catch(e) { // If no sources synced yet, fall back to empty state with hint @@ -940,26 +951,54 @@ async function qtTest() { if (!r.matched) { document.getElementById('qt-result').innerHTML = `
- ⚠ No format pattern matched this log line. -

The parser's format strings didn't produce a match. Check that the log sample matches the expected format, or that the parser has SDL format strings (some parsers use grok/dottedJson which aren't tested here).

+ ⚠ ${esc(r.message || 'No format pattern matched this log line.')} +

The parser's format strings didn't produce a match. Check that the log sample matches the expected format, or that the parser uses grok/dottedJson which aren't tested here.

` return } - const rows = r.fields.map(f => ` + const extracts = (r.fields || []).filter(f => f.source !== 'rewrite') + const rewrites = (r.fields || []).filter(f => f.source === 'rewrite') + const rowsExtract = extracts.map(f => ` ${esc(f.field)} ${esc(String(f.value))} `).join('') + const rowsRewrite = rewrites.map(f => ` + ${esc(f.field)} + ${esc(String(f.value))} + `).join('') + const modeBadge = r.mode === 'json' + ? 'JSON auto-extract' + : 'regex format' + const counts = r.mode === 'json' + ? `${r.extracted_count} extracted · ${r.derived_count} rewritten` + + (r.payload_count > 1 ? ` · showing payload ${r.showing_payload}/${r.payload_count}` : '') + + `` : '' + const parseWarn = (r.parse_errors && r.parse_errors.length) + ? `
+ ${r.parse_errors.length} line(s) skipped: ${r.parse_errors.slice(0,3).map(esc).join(' | ')}${r.parse_errors.length>3?' …':''} +
` : '' document.getElementById('qt-result').innerHTML = `
- Matched format: ${esc(r.format_matched)} + Matched format: ${esc(r.format_matched)} ${modeBadge} +
${counts}
+ ${parseWarn}
+ + + + + + ${rowsExtract} +
Extracted FieldValue
+ ${rewrites.length ? ` +

Derived (rewrites applied — ${rewrites.length})

- - + + - ${rows} -
FieldExtracted ValueOutput FieldValue
` + ${rowsRewrite} + ` : ''}` } catch(e) { document.getElementById('qt-result').innerHTML = errBox(e.message) } finally { setBtn('btn-qt', false, 'Test') }