Files
marcredhat-siem-toolkit-pat…/convert_test_deploy_sigma.py
T
marc 4df8e844e5 Sigma -> SentinelOne PowerQuery pipeline
End-to-end workflow that turns SigmaHQ rules into SDL Scheduled
custom-detection rules:

1. SIEM-toolkit provides the coverage map to find what's thin --
   MITRE ATT&CK heatmap across all detection library rules, rule
   firing status (active vs never-fired).
2. Pick Sigma rules (https://github.com/SigmaHQ/sigma) that target
   those tactics.
3. Convert the Sigma rules to PowerQuery with
   pysigma-backend-sentinelone-pq.
4. Smoke-test against your tenant's /api/powerQuery, deploy via
   /web/api/v2.1/cloud-detection/rules as Scheduled PQ rules in Draft.
5. Re-running on a different tenant is just re-pointing the
   credentials -- the converted .pq bodies travel as-is.

Files:
  README_sigma_pipeline.md       full workflow doc
  recommend_sigma_imports.py     coverage-map reader -> rule shortlist
  probe_wel_schema.py            WEL parser field discovery
  convert_test_deploy_sigma.py   pick + convert + 3 variants + deploy
  fixup_rules_6_7.py             OriginalFileName pre-processor
  run_sigma_on_tenant.py         redeploy already-converted bodies
  verify_rule_exists_via_put.py  PUT-existence test (RBAC workaround)
  verify_deployed_sigma_rules.py RBAC visibility diagnostic
  tenant_config.example.json     credentials template (gitignored real one)

Each converted rule emits three PowerQuery variants:
  <stem>.pq          faithful (S1 DV schema)
  <stem>.relaxed.pq  drops endpoint.os + event.type clauses
  <stem>.wel.pq      rewritten onto microsoft_windows_eventlog-latest

All scripts read credentials from tenant_config.json (or the
SIEM_TOOLKIT_CONFIG env var), discover the target site_id at runtime,
and persist deployed rule IDs to deployed_rule_ids.json so the verify
scripts work without hardcoded IDs.
2026-05-28 12:29:37 +02:00

407 lines
17 KiB
Python

#!/usr/bin/env python3
"""
convert_test_deploy_sigma.py -- Sigma -> PowerQuery -> SDL Scheduled Rule.
Master pipeline that addresses every TODO from the v3 review:
(a) Fixes rule #6 (netsh) by trying multiple candidate filenames AND by
catching the pipeline error so the loop continues. Fixes rule #7
(AdsiSearcher) by also searching rules/windows/powershell/.
(b) Adds a WEL-mapping post-processor that rewrites the S1 EDR/DV PQ
fields to the microsoft_windows_eventlog-latest parser schema so
the queries can fire against Windows Event Log telemetry.
(c) Deploys every PQ that passes the live /api/powerQuery smoke test
as an SDL Scheduled rule via the S1 Mgmt API (POST
/web/api/v2.1/cloud-detection/rules). Requires --deploy + a valid
S1_CONSOLE_API_TOKEN in config.json.
For each rule we emit THREE PowerQuery variants and smoke-test each:
<stem>.pq -- faithful Sigma -> S1-PQ conversion (DV schema)
<stem>.relaxed.pq -- faithful minus the endpoint.os and event.type
clauses (DV schema but null-os-tolerant)
<stem>.wel.pq -- field-mapped onto microsoft_windows_eventlog-
latest (CommandLine, Image, ParentImage, ...)
Usage:
python3 convert_test_deploy_sigma.py # convert + test only
python3 convert_test_deploy_sigma.py --deploy # also create SDL rules
"""
from __future__ import annotations
import argparse
import json
import os
import pathlib
import re
import subprocess
import time
import urllib.error
import urllib.request
from typing import Any
HERE = pathlib.Path(__file__).resolve().parent
VENV_PY = os.environ.get("SIGMA_VENV_PY", "/tmp/sigma_venv/bin/python3")
GH = os.environ.get("GH_BIN", "gh")
OUT = pathlib.Path(os.environ.get(
"SIGMA_OUT_DIR", "/tmp/sigma_converted_v4")); OUT.mkdir(exist_ok=True)
_CFG_PATH = os.environ.get("SIEM_TOOLKIT_CONFIG",
str(HERE / "tenant_config.json"))
CFG = json.load(open(_CFG_PATH))
SDL_BASE = CFG["SDL_XDR_URL"].rstrip("/")
SDL_KEY = CFG["SDL_LOG_READ_KEY"]
S1_CONS = CFG.get("S1_CONSOLE_URL", "").rstrip("/")
S1_TOK = CFG.get("S1_CONSOLE_API_TOKEN", "").rstrip(".")
# Site id is discovered at runtime from /sites?limit=10 (first active site).
# Override with SITE_ID env var if you have multiple sites and want a
# specific one.
SITE_ID = os.environ.get("SITE_ID", "")
SIGMA_RAW = "https://raw.githubusercontent.com/SigmaHQ/sigma/master"
# 10 desired (tactic, technique, keyword_list, allow_powershell_folder)
WANTED: list[tuple[str, str, list[str], bool]] = [
("Lateral Movement", "T1021.006 WinRM",
["winrm", "winrs"], False),
("Collection", "T1113 Screen Capture",
["screen_capture", "screencapture", "screenshot"], False),
("Collection", "T1115 Clipboard Data",
["clipboard"], False),
("Exfiltration", "T1560.001 Archive via RAR",
["winrar_compress", "winrar", "rar_compress"], False),
("Exfiltration", "T1567.002 Exfil via rclone",
["rclone"], False),
("Reconnaissance", "T1016 netsh port-fwd",
["netsh_allowed_ports", "netsh_port_proxy", "netsh_port_fwd",
"netsh_fw", "netsh_portproxy"], False),
("Discovery", "T1087.002 AdsiSearcher",
["adsisearcher", "adsi_searcher"], True), # in powershell/
("Discovery", "T1087/T1482 SharpHound",
["sharphound", "bloodhound"], False),
("Credential Access", "T1003.001 Mimikatz cmdline",
["mimikatz_command_line", "mimikatz_cli", "mimikatz"], False),
("Credential Access", "T1003.001 ProcDump LSASS",
["procdump_lsass", "procdump", "comsvcs_lsass"], False),
]
# ============================================================ helpers ======
def gh_api(path: str) -> Any:
r = subprocess.run([GH, "api", path], capture_output=True, text=True,
timeout=60)
if r.returncode != 0:
raise RuntimeError(f"gh api {path}: {r.stderr.strip()[:300]}")
return json.loads(r.stdout)
def fetch(url: str) -> bytes:
req = urllib.request.Request(url, headers={"User-Agent": "siem-toolkit"})
with urllib.request.urlopen(req, timeout=30) as r:
return r.read()
def list_sigma_rules(allow_powershell: bool) -> list[str]:
tree = gh_api("repos/SigmaHQ/sigma/git/trees/master?recursive=1")
prefixes = ["rules/windows/process_creation/"]
if allow_powershell:
prefixes.append("rules/windows/powershell/")
return sorted(
e["path"] for e in tree.get("tree", [])
if e.get("type") == "blob"
and e.get("path", "").endswith(".yml")
and any(e["path"].startswith(p) for p in prefixes)
)
def pick(paths: list[str], keywords: list[str]) -> str | None:
for kw in keywords:
for p in paths:
if kw in pathlib.Path(p).stem.lower():
return p
return None
def convert(yaml_text: str) -> str:
code = (
"import sys\n"
"from sigma.rule import SigmaRule\n"
"from sigma.backends.sentinelone_pq import SentinelOnePQBackend\n"
"r = SigmaRule.from_yaml(sys.stdin.read())\n"
"print(SentinelOnePQBackend().convert_rule(r)[0])\n")
res = subprocess.run([VENV_PY, "-c", code], input=yaml_text, text=True,
capture_output=True, timeout=90)
if res.returncode != 0:
# last line of the trace is usually the most informative
err = res.stderr.strip().splitlines()
msg = err[-1] if err else "(no stderr)"
raise RuntimeError(msg[:300])
return res.stdout.strip()
def relax(pq_body: str) -> str:
"""Strip endpoint.os and event.type filter clauses."""
body = pq_body
body = re.sub(r'endpoint\.os\s*=\s*"[^"]*"\s+and\s+', '', body)
body = re.sub(r'\s+and\s+endpoint\.os\s*=\s*"[^"]*"', '', body)
body = re.sub(r'event\.type\s*=\s*"[^"]*"\s+and\s+', '', body)
body = re.sub(r'\s+and\s+event\.type\s*=\s*"[^"]*"', '', body)
body = re.sub(r'^\(\s*(.*)\s*\)$', r'\1', body.strip())
return body.strip()
# DV schema -> WEL parser schema (microsoft_windows_eventlog-latest).
# Sysmon (EID=1) and Security (EID=4688) channels use slightly different
# field names; the WEL parser exposes Sysmon-style Image/ParentImage AND
# Security-style NewProcessName/ParentProcessName. We rewrite onto the
# more permissive Sysmon names because they're closer to S1 DV.
DV_TO_WEL = [
(r'\btgt\.process\.cmdline\b', 'CommandLine'),
(r'\btgt\.process\.image\.path\b', 'Image'),
(r'\btgt\.process\.displayName\b', 'OriginalFileName'),
(r'\btgt\.process\.publisher\b', 'Company'),
(r'\bsrc\.process\.image\.path\b', 'ParentImage'),
(r'\bsrc\.process\.cmdline\b', 'ParentCommandLine'),
(r'\bsrc\.process\.user\.name\b', 'User'),
]
def wel_map(pq_body: str) -> str:
"""Rewrite a faithful DV-schema PQ body to query the
microsoft_windows_eventlog-latest parser instead. Strategy:
- replace tgt.process.* / src.process.* with WEL field names
- replace `event.type="Process Creation"` with EID filter
- replace `endpoint.os="windows"` with dataSource.name='Windows Event Logs'
- prepend a parser-name pin so the filter narrows fast
"""
body = pq_body
for pat, repl in DV_TO_WEL:
body = re.sub(pat, repl, body)
body = re.sub(r'event\.type\s*=\s*"Process Creation"',
"(EventID=4688 or EventID=1)", body)
body = re.sub(r'endpoint\.os\s*=\s*"windows"',
"dataSource.name='Windows Event Logs'", body)
# Drop any leftover DV-only field comparisons that didn't map (would
# otherwise null-filter every row). Only one we've seen: integrityLevel.
body = re.sub(r'(?:\(\s*)?[\w.]+\.integrityLevel\s*=\s*"[^"]*"'
r'\s+(?:and|or)\s+', '', body)
return body.strip()
def pq(query: str, hours: int = 24) -> tuple[int, str, int]:
end = int(time.time() * 1000); start = end - hours * 3600 * 1000
payload = {"token": SDL_KEY, "query": query,
"startTime": str(start), "endTime": str(end)}
req = urllib.request.Request(
f"{SDL_BASE}/api/powerQuery",
data=json.dumps(payload).encode(),
headers={"Content-Type": "application/json"}, method="POST")
try:
with urllib.request.urlopen(req, timeout=60) as r:
d = json.loads(r.read())
return 200, "ok", len(d.get("values") or [])
except urllib.error.HTTPError as e:
return e.code, e.read().decode()[:250], 0
def deploy_rule(name: str, description: str, pq_body: str) -> tuple[int, str]:
"""POST a Scheduled-PQ rule to S1 Mgmt API."""
if not (S1_CONS and S1_TOK):
return 0, "no S1_CONSOLE_URL or S1_CONSOLE_API_TOKEN in config"
payload = {
"data": {
"name": name,
"description": description,
"severity": "Medium",
"expirationMode": "Permanent",
"queryType": "scheduled",
"queryLang": "2.0",
"status": "Draft",
"treatAsThreat": "UNDEFINED",
"networkQuarantine": False,
"coolOffSettings": {"renotifyMinutes": 60},
"scheduledParams": {
"query": pq_body,
"lookbackWindowMinutes": 30,
"runIntervalMinutes": 5,
"threshold": {"value": 0, "operator": "Greater"},
},
},
"filter": {"siteIds": [SITE_ID]},
}
req = urllib.request.Request(
f"{S1_CONS}/web/api/v2.1/cloud-detection/rules",
data=json.dumps(payload).encode(), method="POST")
req.add_header("Authorization", f"ApiToken {S1_TOK}")
req.add_header("Content-Type", "application/json")
req.add_header("Accept", "application/json")
try:
with urllib.request.urlopen(req, timeout=30) as r:
d = json.loads(r.read())
rid = (d.get("data") or {}).get("id") or "?"
return 200, f"created id={rid}"
except urllib.error.HTTPError as e:
return e.code, e.read().decode()[:300]
# ============================================================ main =========
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--deploy", action="store_true",
help="Also create each valid PQ as an SDL Scheduled rule.")
args = ap.parse_args()
print(f"\n{'='*78}\n Sigma -> PowerQuery (faithful + relaxed + WEL) "
f"-> SDL rule\n{'='*78}\n")
print(f" Backend : pysigma-backend-sentinelone-pq")
print(f" Tenant SDL : {SDL_BASE}")
print(f" Tenant Mgmt API : {S1_CONS}")
print(f" Deploy rules : {'YES' if args.deploy else 'no (use --deploy)'}")
print(f" Output : {OUT}\n")
# Site-id auto-discovery (only needed for --deploy).
global SITE_ID
if args.deploy and not SITE_ID:
try:
req = urllib.request.Request(
f"{S1_CONS}/web/api/v2.1/sites?limit=10")
req.add_header("Authorization", f"ApiToken {S1_TOK}")
req.add_header("Accept", "application/json")
with urllib.request.urlopen(req, timeout=20) as r:
sites = ((json.loads(r.read()).get("data") or {})
.get("sites") or [])
if not sites:
print(" FATAL: --deploy requested but no sites visible "
"to this token.")
return 1
SITE_ID = sites[0]["id"]
print(f" Site discovered : {SITE_ID} "
f"({sites[0].get('name')})\n")
except urllib.error.HTTPError as e:
print(f" FATAL site discovery: HTTP {e.code} "
f"{e.read().decode()[:200]}")
return 1
# Pre-fetch the two relevant trees once
print("--- listing sigmahq/sigma rule paths via gh api ---")
pc_only = list_sigma_rules(allow_powershell=False)
pc_and_pwsh = list_sigma_rules(allow_powershell=True)
print(f" process_creation/ : {len(pc_only)} rules")
print(f" process_creation/ + powershell/ : {len(pc_and_pwsh)} rules\n")
summary: list[dict[str, Any]] = []
for i, (tactic, tech, kws, allow_pwsh) in enumerate(WANTED, 1):
paths = pc_and_pwsh if allow_pwsh else pc_only
rec: dict[str, Any] = {"i": i, "tactic": tactic, "tech": tech}
print(f"[{i:02d}/10] {tactic} :: {tech}")
path = pick(paths, kws)
if not path:
print(f" PICK : no match for {kws}\n")
rec["status"] = "no_match"; summary.append(rec); continue
print(f" PICK : {path}")
rec["path"] = path
try:
raw = fetch(f"{SIGMA_RAW}/{path}").decode("utf-8")
except Exception as e:
print(f" FETCH : FAIL {e}\n")
rec["status"] = "fetch_failed"; summary.append(rec); continue
stem = pathlib.Path(path).stem
(OUT / f"{stem}.yml").write_text(raw)
try:
pq_body = convert(raw)
except Exception as e:
print(f" CONVERT : FAIL {e}\n")
rec["status"] = "convert_failed"; rec["err"] = str(e)
summary.append(rec); continue
relaxed_body = relax(pq_body)
wel_body = wel_map(pq_body)
(OUT / f"{stem}.pq").write_text(pq_body)
(OUT / f"{stem}.relaxed.pq").write_text(relaxed_body)
(OUT / f"{stem}.wel.pq").write_text(wel_body)
rec["pq_chars"] = len(pq_body)
rec["relaxed_chars"] = len(relaxed_body)
rec["wel_chars"] = len(wel_body)
print(f" CONVERT : OK faithful={len(pq_body)}c "
f"relaxed={len(relaxed_body)}c wel={len(wel_body)}c")
# smoke test all three
c1, _, r1 = pq(pq_body)
c2, _, r2 = pq(relaxed_body)
c3, e3, r3 = pq(wel_body)
rec.update({"fa_http": c1, "fa_rows": r1,
"re_http": c2, "re_rows": r2,
"wel_http": c3, "wel_rows": r3,
"wel_err": e3 if c3 != 200 else ""})
print(f" TEST FA : HTTP {c1} rows={r1}")
print(f" TEST RE : HTTP {c2} rows={r2}")
print(f" TEST WEL: HTTP {c3} rows={r3}"
f"{' err=' + e3[:120] if c3 != 200 else ''}")
valid = (c1 == 200) or (c3 == 200)
rec["status"] = ("FIRES" if (r1 > 0 or r2 > 0 or r3 > 0)
else "valid_no_data" if valid
else "PQ_ERROR")
# deploy faithful (only) if requested + valid
if args.deploy and c1 == 200:
rule_name = (f"[Sigma->PQ] {tactic} / {tech} "
f"({pathlib.Path(path).stem})")[:128]
desc = (f"Auto-converted from SigmaHQ/sigma "
f"{path} via pysigma-backend-sentinelone-pq. "
f"Faithful S1 DV schema.")
dc, dmsg = deploy_rule(rule_name, desc, pq_body)
rec["deploy_http"] = dc; rec["deploy_msg"] = dmsg
if dc == 200:
# dmsg shape is "created id=<id>"; extract just the id
rec["rule_id"] = dmsg.split("id=")[-1].strip()
rec["pq_file"] = f"{pathlib.Path(path).stem}.pq"
print(f" DEPLOY : HTTP {dc} {dmsg[:160]}")
print()
summary.append(rec)
# --- summary ---
print(f"{'='*78}\n SUMMARY (rows = events matched in last 24 h)\n"
f"{'='*78}")
hdr = (f" {'#':>3} {'tactic':<18}{'technique':<26}"
f"{'fa':>5}{'re':>5}{'wel':>5} status")
print(hdr); print(" " + "-" * (len(hdr) - 2))
for s in summary:
print(f" {s['i']:>3} {s['tactic']:<18}{s['tech']:<26}"
f"{s.get('fa_rows','-')!s:>5}{s.get('re_rows','-')!s:>5}"
f"{s.get('wel_rows','-')!s:>5} {s.get('status','-')}")
fires = sum(1 for s in summary
if any(s.get(k, 0) and s[k] > 0
for k in ('fa_rows', 're_rows', 'wel_rows')))
valid = sum(1 for s in summary
if s.get('status') in ('valid_no_data', 'FIRES'))
failed = sum(1 for s in summary
if s.get('status') in ('no_match', 'fetch_failed',
'convert_failed', 'PQ_ERROR'))
print(f"\n Rules with any matches : {fires}/10")
print(f" Syntactically valid : {valid}/10")
print(f" Failed / not matched : {failed}/10")
if args.deploy:
deployed = [s for s in summary if s.get('deploy_http') == 200]
print(f" SDL rules created : {len(deployed)}/10")
# Persist the (rule_id, pq_file) map for verify scripts.
ids_file = HERE / "deployed_rule_ids.json"
ids_file.write_text(json.dumps(
{"tenant": S1_CONS,
"site_id": SITE_ID,
"rules": [{"rule_id": s["rule_id"],
"pq_file": s["pq_file"],
"tactic": s["tactic"],
"tech": s["tech"]}
for s in deployed]}, indent=2))
print(f" Deployed IDs : {ids_file}")
print(f" Artefacts : {OUT}/")
print(f"\n Next steps:")
print(f" - inspect {OUT}/*.wel.pq for WEL variants")
print(f" - re-run with --deploy to create SDL Scheduled rules")
print(f" - verify with verify_rule_exists_via_put.py")
print(f" - check console UI: {S1_CONS}/#/cloud-detection/rules\n")
return 0
if __name__ == "__main__":
raise SystemExit(main())