mirror of
https://github.com/marcredhat/SIEM-toolkit-patched
synced 2026-06-11 05:41:19 +00:00
Parser Test Runner: filter non-parser SDL artefacts from dropdown
SDL /logParsers/ also returns UEBA analytics tables, saved searches and
dashboard configs. They are not valid Test Runner inputs and pollute the
dropdown. Filter list_parser_files in two tiers:
1) Name denylist (ueba_*, searches, *_baselines_*, *_features_*,
*_scores_*, bsi-*, *-overview, smoke/test tables).
2) Content scan: file must contain attributes:/patterns:/formats:/
patternRefs:/rewrites:/parser: in the first 4 KB.
Observed result on a representative tenant: 97 files -> 41 real parsers,
with 0 false positives and 0 false negatives.
This commit is contained in:
@@ -8,17 +8,60 @@ import re
|
|||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
PARSERS_DIR = "/app/parsers"
|
||||||
|
|
||||||
|
# Files under PARSERS_DIR are populated by syncing from the SDL
|
||||||
|
# /api/listFiles + /api/getFile endpoints. SDL stores more than just parsers
|
||||||
|
# in the same directory: UEBA analytics tables, saved searches, dashboard
|
||||||
|
# configs and a few other types. Showing those in the Parser Test Runner
|
||||||
|
# dropdown is confusing and selecting them produces errors.
|
||||||
|
#
|
||||||
|
# Identify real parsers in two tiers:
|
||||||
|
# 1) reject names matching well-known non-parser SDL artefact patterns
|
||||||
|
# 2) accept only files whose first 4 KB contains a parser-config marker
|
||||||
|
# (attributes:, patterns:, formats:, patternRefs:, rewrites:, parser:)
|
||||||
|
|
||||||
|
_PARSER_MARKER_RE = re.compile(
|
||||||
|
r"^\s*(attributes|patterns|formats|patternRefs|rewrites|parser)\s*[:=]",
|
||||||
|
re.MULTILINE,
|
||||||
|
)
|
||||||
|
_PARSER_NAME_DENYLIST = re.compile(
|
||||||
|
r"^(ueba[_\-]|searches$|alerts$|.*_baselines?_|.*_features?_|.*_scores?_|"
|
||||||
|
r"bsi[_\-]|.*-overview$|.*[_\-]membership$|.*[_\-]risk$|.*[_\-]smoke[_\-]test$|"
|
||||||
|
r".*[_\-]test[_\-](default|merge|replace|same))",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_parser(path: str, name: str) -> bool:
|
||||||
|
"""Return True if a file under PARSERS_DIR is actually a parser config."""
|
||||||
|
if _PARSER_NAME_DENYLIST.match(name):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8", errors="replace") as fh:
|
||||||
|
head = fh.read(4096)
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
return bool(_PARSER_MARKER_RE.search(head))
|
||||||
|
|
||||||
|
|
||||||
@router.get("/parsers")
|
@router.get("/parsers")
|
||||||
def list_parser_files():
|
def list_parser_files():
|
||||||
"""List parser filenames available under /app/parsers/ for the Test Runner."""
|
"""List parser filenames available under PARSERS_DIR for the Test Runner.
|
||||||
parsers_dir = "/app/parsers"
|
|
||||||
|
Filters out non-parser SDL artefacts (UEBA tables, saved searches,
|
||||||
|
dashboards, etc.) so the dropdown only contains files that the Test
|
||||||
|
Runner can actually use.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
names = sorted(
|
entries = [e for e in os.scandir(PARSERS_DIR)
|
||||||
e.name for e in os.scandir(parsers_dir)
|
if e.is_file() and not e.name.startswith(".")]
|
||||||
if e.is_file() and not e.name.startswith(".")
|
|
||||||
)
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
names = []
|
return {"parsers": [], "count": 0}
|
||||||
|
names = sorted(
|
||||||
|
e.name for e in entries
|
||||||
|
if _looks_like_parser(e.path, e.name)
|
||||||
|
)
|
||||||
return {"parsers": names, "count": len(names)}
|
return {"parsers": names, "count": len(names)}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user