mirror of
https://github.com/marcredhat/SIEM-toolkit-patched
synced 2026-06-10 13:21:17 +00:00
Sigma -> SentinelOne PowerQuery pipeline
End-to-end workflow that turns SigmaHQ rules into SDL Scheduled custom-detection rules: 1. SIEM-toolkit provides the coverage map to find what's thin -- MITRE ATT&CK heatmap across all detection library rules, rule firing status (active vs never-fired). 2. Pick Sigma rules (https://github.com/SigmaHQ/sigma) that target those tactics. 3. Convert the Sigma rules to PowerQuery with pysigma-backend-sentinelone-pq. 4. Smoke-test against your tenant's /api/powerQuery, deploy via /web/api/v2.1/cloud-detection/rules as Scheduled PQ rules in Draft. 5. Re-running on a different tenant is just re-pointing the credentials -- the converted .pq bodies travel as-is. Files: README_sigma_pipeline.md full workflow doc recommend_sigma_imports.py coverage-map reader -> rule shortlist probe_wel_schema.py WEL parser field discovery convert_test_deploy_sigma.py pick + convert + 3 variants + deploy fixup_rules_6_7.py OriginalFileName pre-processor run_sigma_on_tenant.py redeploy already-converted bodies verify_rule_exists_via_put.py PUT-existence test (RBAC workaround) verify_deployed_sigma_rules.py RBAC visibility diagnostic tenant_config.example.json credentials template (gitignored real one) Each converted rule emits three PowerQuery variants: <stem>.pq faithful (S1 DV schema) <stem>.relaxed.pq drops endpoint.os + event.type clauses <stem>.wel.pq rewritten onto microsoft_windows_eventlog-latest All scripts read credentials from tenant_config.json (or the SIEM_TOOLKIT_CONFIG env var), discover the target site_id at runtime, and persist deployed rule IDs to deployed_rule_ids.json so the verify scripts work without hardcoded IDs.
This commit is contained in:
@@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
probe_wel_schema.py
|
||||
|
||||
Probe the tenant's Singularity Data Lake to discover what fields the
|
||||
`microsoft_windows_eventlog-latest` parser emits. Output guides the WEL
|
||||
mapping pipeline in convert_test_deploy_sigma.py.
|
||||
|
||||
Runs a series of read-only PowerQuery probes for the last 24 h. No state
|
||||
changes -- safe to re-run.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
HERE = pathlib.Path(__file__).resolve().parent
|
||||
_CFG_PATH = os.environ.get("SIEM_TOOLKIT_CONFIG",
|
||||
str(HERE / "tenant_config.json"))
|
||||
CFG = json.load(open(_CFG_PATH))
|
||||
BASE = CFG["SDL_XDR_URL"].rstrip("/")
|
||||
TOK = CFG["SDL_LOG_READ_KEY"]
|
||||
|
||||
|
||||
def pq(query: str, hours: int = 24) -> tuple[str, list, list[str]]:
|
||||
end = int(time.time() * 1000); start = end - hours * 3600 * 1000
|
||||
req = urllib.request.Request(
|
||||
f"{BASE}/api/powerQuery",
|
||||
data=json.dumps({"token": TOK, "query": query,
|
||||
"startTime": str(start),
|
||||
"endTime": str(end)}).encode(),
|
||||
headers={"Content-Type": "application/json"}, method="POST")
|
||||
try:
|
||||
d = json.loads(urllib.request.urlopen(req, timeout=60).read())
|
||||
return ("OK", d.get("values") or [],
|
||||
[c.get("name") for c in (d.get("columns") or [])])
|
||||
except urllib.error.HTTPError as e:
|
||||
return (f"HTTP{e.code}", [e.read().decode()[:250]], [])
|
||||
except Exception as e:
|
||||
return (f"{type(e).__name__}", [str(e)], [])
|
||||
|
||||
|
||||
PROBES: list[tuple[str, str]] = [
|
||||
("WEL distribution by EventID",
|
||||
"parser.name='microsoft_windows_eventlog-latest' "
|
||||
"| group n=count() by EventID | sort -n | limit 20"),
|
||||
("WEL channel / provider distribution",
|
||||
"parser.name='microsoft_windows_eventlog-latest' "
|
||||
"| group n=count() by Channel | sort -n | limit 15"),
|
||||
("WEL ProviderName distribution",
|
||||
"parser.name='microsoft_windows_eventlog-latest' "
|
||||
"| group n=count() by ProviderName | sort -n | limit 15"),
|
||||
("WEL EID=4688 row sample (Security: process creation)",
|
||||
"parser.name='microsoft_windows_eventlog-latest' EventID=4688 "
|
||||
"| columns CommandLine, NewProcessName, ParentProcessName, "
|
||||
"SubjectUserName, ProcessId | limit 3"),
|
||||
("WEL EID=1 row sample (Sysmon: process creation)",
|
||||
"parser.name='microsoft_windows_eventlog-latest' EventID=1 "
|
||||
"| columns CommandLine, Image, ParentImage, User, ProcessGuid | limit 3"),
|
||||
("Probe alternate camelCase fields on the WEL parser",
|
||||
"parser.name='microsoft_windows_eventlog-latest' "
|
||||
"| columns commandLine, image, parentImage, eventId | limit 3"),
|
||||
("Probe nested process.* fields on the WEL parser",
|
||||
"parser.name='microsoft_windows_eventlog-latest' "
|
||||
"| columns process.cmdLine, process.image.path, "
|
||||
"process.parentImage.path, event.id | limit 3"),
|
||||
("EID=4688 count alone (volume sanity)",
|
||||
"parser.name='microsoft_windows_eventlog-latest' EventID=4688 "
|
||||
"| group n=count() | limit 1"),
|
||||
("EID=1 count alone",
|
||||
"parser.name='microsoft_windows_eventlog-latest' EventID=1 "
|
||||
"| group n=count() | limit 1"),
|
||||
("Any cmdline-bearing record sample (raw)",
|
||||
"parser.name='microsoft_windows_eventlog-latest' "
|
||||
"| columns rawMessage | limit 1"),
|
||||
]
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print(f"\n{'='*78}\n WEL parser schema probe -- last 24 h\n "
|
||||
f"endpoint: {BASE}/api/powerQuery\n{'='*78}")
|
||||
for label, query in PROBES:
|
||||
status, rows, cols = pq(query)
|
||||
oneline = query.replace("\n", " ")
|
||||
print(f"\n--- {label} ---")
|
||||
print(f" query : {oneline[:160]}{'...' if len(oneline)>160 else ''}")
|
||||
print(f" status: {status} cols: {cols}")
|
||||
for r in rows[:10]:
|
||||
r_str = str(r)
|
||||
print(f" {r_str[:240]}{'...' if len(r_str)>240 else ''}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user