mirror of
https://github.com/marcredhat/SIEM-toolkit-patched
synced 2026-06-08 12:33:51 +00:00
101 lines
3.6 KiB
Python
101 lines
3.6 KiB
Python
#!/usr/bin/env python3
|
|
"""Probe the SDL tenant to understand why Avelios Medical field-population shows 0%."""
|
|
import json, time, urllib.request, urllib.error
|
|
import os
|
|
|
|
def _load_sdl_cfg():
|
|
import json as _j, os as _o, sys as _s
|
|
here = _o.path.dirname(_o.path.abspath(__file__))
|
|
candidates = [
|
|
_o.environ.get("SDL_CONFIG"),
|
|
_o.path.join(here, "sdl_config.json"),
|
|
_o.path.join(here, "..", "sdl_config.json"),
|
|
]
|
|
for p in candidates:
|
|
if p and _o.path.exists(p):
|
|
with open(p) as fh:
|
|
return _j.load(fh)
|
|
_s.stderr.write(
|
|
"ERROR: no SDL config found. Set $SDL_CONFIG or create sdl_config.json "
|
|
"(see sdl_config.example.json)\n")
|
|
_s.exit(2)
|
|
|
|
|
|
CFG = _load_sdl_cfg()
|
|
BASE = CFG['base_url'].rstrip('/')
|
|
KEY = CFG['log_read_key']
|
|
END_MS = int(time.time() * 1000)
|
|
START_MS = END_MS - 24 * 3600 * 1000 # last 24h
|
|
|
|
|
|
def pq(query: str, max_count: int = 10) -> dict:
|
|
body = json.dumps({
|
|
"token": KEY, "query": query,
|
|
"startTime": START_MS, "endTime": END_MS,
|
|
"maxCount": max_count,
|
|
}).encode()
|
|
req = urllib.request.Request(BASE + '/api/powerQuery', data=body,
|
|
headers={"Content-Type": "application/json"})
|
|
try:
|
|
return json.loads(urllib.request.urlopen(req, timeout=30).read())
|
|
except urllib.error.HTTPError as e:
|
|
return {"_err": f"HTTP {e.code}: {e.read().decode()[:200]}"}
|
|
except Exception as e:
|
|
return {"_err": str(e)[:200]}
|
|
|
|
|
|
def show(label, d):
|
|
if "_err" in d:
|
|
print(f"[ERR] {label}: {d['_err']}"); return
|
|
cols = [c['name'] if isinstance(c, dict) else c for c in d.get('columns', [])]
|
|
vals = d.get('values', []) or d.get('matches', [])
|
|
print(f"[OK ] {label} cols={cols} rows={len(vals)}")
|
|
for v in vals[:8]:
|
|
print(f" {v}")
|
|
|
|
|
|
# 1. Distinct dataSource.name values containing 'velio'
|
|
print("=" * 70)
|
|
print("1. Source-name spellings containing 'velio'")
|
|
print("=" * 70)
|
|
show("by dataSource.name",
|
|
pq("| group n=count() by dataSource.name | sort -n | limit 50", max_count=50))
|
|
|
|
# 2. Try a few candidate names
|
|
print()
|
|
print("=" * 70)
|
|
print("2. Try filtering by candidate names")
|
|
print("=" * 70)
|
|
for cand in ["Avelios Medical", "Avelios-Medical", "Avelios-Medical-OCSF",
|
|
"avelios", "Avelios"]:
|
|
d = pq(f"| filter dataSource.name == '{cand}' | group n=count()", max_count=1)
|
|
n = (d.get('values') or [[None]])[0][0] if 'values' in d else d
|
|
print(f" {cand!r:<35} -> {n}")
|
|
for cand in ["Avelios Medical", "Avelios-Medical-OCSF", "avelios"]:
|
|
d = pq(f"| filter dataSource.name contains '{cand}' | group n=count()", max_count=1)
|
|
n = (d.get('values') or [[None]])[0][0] if 'values' in d else d
|
|
print(f" contains {cand!r:<25} -> {n}")
|
|
|
|
# 3. Sample one raw event to see what column names actually come back
|
|
print()
|
|
print("=" * 70)
|
|
print("3. Sample one event — what keys/columns are returned?")
|
|
print("=" * 70)
|
|
d = pq("| filter dataSource.name contains 'velio' | limit 1", max_count=1)
|
|
if "_err" in d:
|
|
print(" ", d["_err"])
|
|
else:
|
|
print(" columns:", [c['name'] if isinstance(c, dict) else c for c in d.get('columns', [])][:30])
|
|
print(" first row sample:", str((d.get('values') or [None])[0])[:400])
|
|
|
|
# 4. If we got columns, check which OCSF fields exist
|
|
print()
|
|
print("=" * 70)
|
|
print("4. Field presence in last 24h for Avelios (using columns command)")
|
|
print("=" * 70)
|
|
d = pq("| filter dataSource.name contains 'velio' | "
|
|
"columns dataSource.name, metadata.product.name, metadata.event_code, "
|
|
"actor.user.name, src_endpoint.ip, dst_endpoint.ip | limit 5",
|
|
max_count=5)
|
|
show("columns view", d)
|