#!/usr/bin/env python3 """ convert_test_deploy_sigma.py -- Sigma -> PowerQuery -> SDL Scheduled Rule. Master pipeline that addresses every TODO from the v3 review: (a) Fixes rule #6 (netsh) by trying multiple candidate filenames AND by catching the pipeline error so the loop continues. Fixes rule #7 (AdsiSearcher) by also searching rules/windows/powershell/. (b) Adds a WEL-mapping post-processor that rewrites the S1 EDR/DV PQ fields to the microsoft_windows_eventlog-latest parser schema so the queries can fire against Windows Event Log telemetry. (c) Deploys every PQ that passes the live /api/powerQuery smoke test as an SDL Scheduled rule via the S1 Mgmt API (POST /web/api/v2.1/cloud-detection/rules). Requires --deploy + a valid S1_CONSOLE_API_TOKEN in config.json. For each rule we emit THREE PowerQuery variants and smoke-test each: .pq -- faithful Sigma -> S1-PQ conversion (DV schema) .relaxed.pq -- faithful minus the endpoint.os and event.type clauses (DV schema but null-os-tolerant) .wel.pq -- field-mapped onto microsoft_windows_eventlog- latest (CommandLine, Image, ParentImage, ...) Usage: python3 convert_test_deploy_sigma.py # convert + test only python3 convert_test_deploy_sigma.py --deploy # also create SDL rules """ from __future__ import annotations import argparse import json import os import pathlib import re import subprocess import time import urllib.error import urllib.request from typing import Any HERE = pathlib.Path(__file__).resolve().parent VENV_PY = os.environ.get("SIGMA_VENV_PY", "/tmp/sigma_venv/bin/python3") GH = os.environ.get("GH_BIN", "gh") OUT = pathlib.Path(os.environ.get( "SIGMA_OUT_DIR", "/tmp/sigma_converted_v4")); OUT.mkdir(exist_ok=True) _CFG_PATH = os.environ.get("SIEM_TOOLKIT_CONFIG", str(HERE / "tenant_config.json")) CFG = json.load(open(_CFG_PATH)) SDL_BASE = CFG["SDL_XDR_URL"].rstrip("/") SDL_KEY = CFG["SDL_LOG_READ_KEY"] S1_CONS = CFG.get("S1_CONSOLE_URL", "").rstrip("/") S1_TOK = CFG.get("S1_CONSOLE_API_TOKEN", "").rstrip(".") # Site id is discovered at runtime from /sites?limit=10 (first active site). # Override with SITE_ID env var if you have multiple sites and want a # specific one. SITE_ID = os.environ.get("SITE_ID", "") SIGMA_RAW = "https://raw.githubusercontent.com/SigmaHQ/sigma/master" # 10 desired (tactic, technique, keyword_list, allow_powershell_folder) WANTED: list[tuple[str, str, list[str], bool]] = [ ("Lateral Movement", "T1021.006 WinRM", ["winrm", "winrs"], False), ("Collection", "T1113 Screen Capture", ["screen_capture", "screencapture", "screenshot"], False), ("Collection", "T1115 Clipboard Data", ["clipboard"], False), ("Exfiltration", "T1560.001 Archive via RAR", ["winrar_compress", "winrar", "rar_compress"], False), ("Exfiltration", "T1567.002 Exfil via rclone", ["rclone"], False), ("Reconnaissance", "T1016 netsh port-fwd", ["netsh_allowed_ports", "netsh_port_proxy", "netsh_port_fwd", "netsh_fw", "netsh_portproxy"], False), ("Discovery", "T1087.002 AdsiSearcher", ["adsisearcher", "adsi_searcher"], True), # in powershell/ ("Discovery", "T1087/T1482 SharpHound", ["sharphound", "bloodhound"], False), ("Credential Access", "T1003.001 Mimikatz cmdline", ["mimikatz_command_line", "mimikatz_cli", "mimikatz"], False), ("Credential Access", "T1003.001 ProcDump LSASS", ["procdump_lsass", "procdump", "comsvcs_lsass"], False), ] # ============================================================ helpers ====== def gh_api(path: str) -> Any: r = subprocess.run([GH, "api", path], capture_output=True, text=True, timeout=60) if r.returncode != 0: raise RuntimeError(f"gh api {path}: {r.stderr.strip()[:300]}") return json.loads(r.stdout) def fetch(url: str) -> bytes: req = urllib.request.Request(url, headers={"User-Agent": "siem-toolkit"}) with urllib.request.urlopen(req, timeout=30) as r: return r.read() def list_sigma_rules(allow_powershell: bool) -> list[str]: tree = gh_api("repos/SigmaHQ/sigma/git/trees/master?recursive=1") prefixes = ["rules/windows/process_creation/"] if allow_powershell: prefixes.append("rules/windows/powershell/") return sorted( e["path"] for e in tree.get("tree", []) if e.get("type") == "blob" and e.get("path", "").endswith(".yml") and any(e["path"].startswith(p) for p in prefixes) ) def pick(paths: list[str], keywords: list[str]) -> str | None: for kw in keywords: for p in paths: if kw in pathlib.Path(p).stem.lower(): return p return None def convert(yaml_text: str) -> str: code = ( "import sys\n" "from sigma.rule import SigmaRule\n" "from sigma.backends.sentinelone_pq import SentinelOnePQBackend\n" "r = SigmaRule.from_yaml(sys.stdin.read())\n" "print(SentinelOnePQBackend().convert_rule(r)[0])\n") res = subprocess.run([VENV_PY, "-c", code], input=yaml_text, text=True, capture_output=True, timeout=90) if res.returncode != 0: # last line of the trace is usually the most informative err = res.stderr.strip().splitlines() msg = err[-1] if err else "(no stderr)" raise RuntimeError(msg[:300]) return res.stdout.strip() def relax(pq_body: str) -> str: """Strip endpoint.os and event.type filter clauses.""" body = pq_body body = re.sub(r'endpoint\.os\s*=\s*"[^"]*"\s+and\s+', '', body) body = re.sub(r'\s+and\s+endpoint\.os\s*=\s*"[^"]*"', '', body) body = re.sub(r'event\.type\s*=\s*"[^"]*"\s+and\s+', '', body) body = re.sub(r'\s+and\s+event\.type\s*=\s*"[^"]*"', '', body) body = re.sub(r'^\(\s*(.*)\s*\)$', r'\1', body.strip()) return body.strip() # DV schema -> WEL parser schema (microsoft_windows_eventlog-latest). # Sysmon (EID=1) and Security (EID=4688) channels use slightly different # field names; the WEL parser exposes Sysmon-style Image/ParentImage AND # Security-style NewProcessName/ParentProcessName. We rewrite onto the # more permissive Sysmon names because they're closer to S1 DV. DV_TO_WEL = [ (r'\btgt\.process\.cmdline\b', 'CommandLine'), (r'\btgt\.process\.image\.path\b', 'Image'), (r'\btgt\.process\.displayName\b', 'OriginalFileName'), (r'\btgt\.process\.publisher\b', 'Company'), (r'\bsrc\.process\.image\.path\b', 'ParentImage'), (r'\bsrc\.process\.cmdline\b', 'ParentCommandLine'), (r'\bsrc\.process\.user\.name\b', 'User'), ] def wel_map(pq_body: str) -> str: """Rewrite a faithful DV-schema PQ body to query the microsoft_windows_eventlog-latest parser instead. Strategy: - replace tgt.process.* / src.process.* with WEL field names - replace `event.type="Process Creation"` with EID filter - replace `endpoint.os="windows"` with dataSource.name='Windows Event Logs' - prepend a parser-name pin so the filter narrows fast """ body = pq_body for pat, repl in DV_TO_WEL: body = re.sub(pat, repl, body) body = re.sub(r'event\.type\s*=\s*"Process Creation"', "(EventID=4688 or EventID=1)", body) body = re.sub(r'endpoint\.os\s*=\s*"windows"', "dataSource.name='Windows Event Logs'", body) # Drop any leftover DV-only field comparisons that didn't map (would # otherwise null-filter every row). Only one we've seen: integrityLevel. body = re.sub(r'(?:\(\s*)?[\w.]+\.integrityLevel\s*=\s*"[^"]*"' r'\s+(?:and|or)\s+', '', body) return body.strip() def pq(query: str, hours: int = 24) -> tuple[int, str, int]: end = int(time.time() * 1000); start = end - hours * 3600 * 1000 payload = {"token": SDL_KEY, "query": query, "startTime": str(start), "endTime": str(end)} req = urllib.request.Request( f"{SDL_BASE}/api/powerQuery", data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"}, method="POST") try: with urllib.request.urlopen(req, timeout=60) as r: d = json.loads(r.read()) return 200, "ok", len(d.get("values") or []) except urllib.error.HTTPError as e: return e.code, e.read().decode()[:250], 0 def deploy_rule(name: str, description: str, pq_body: str) -> tuple[int, str]: """POST a Scheduled-PQ rule to S1 Mgmt API.""" if not (S1_CONS and S1_TOK): return 0, "no S1_CONSOLE_URL or S1_CONSOLE_API_TOKEN in config" payload = { "data": { "name": name, "description": description, "severity": "Medium", "expirationMode": "Permanent", "queryType": "scheduled", "queryLang": "2.0", "status": "Draft", "treatAsThreat": "UNDEFINED", "networkQuarantine": False, "coolOffSettings": {"renotifyMinutes": 60}, "scheduledParams": { "query": pq_body, "lookbackWindowMinutes": 30, "runIntervalMinutes": 5, "threshold": {"value": 0, "operator": "Greater"}, }, }, "filter": {"siteIds": [SITE_ID]}, } req = urllib.request.Request( f"{S1_CONS}/web/api/v2.1/cloud-detection/rules", data=json.dumps(payload).encode(), method="POST") req.add_header("Authorization", f"ApiToken {S1_TOK}") req.add_header("Content-Type", "application/json") req.add_header("Accept", "application/json") try: with urllib.request.urlopen(req, timeout=30) as r: d = json.loads(r.read()) rid = (d.get("data") or {}).get("id") or "?" return 200, f"created id={rid}" except urllib.error.HTTPError as e: return e.code, e.read().decode()[:300] # ============================================================ main ========= def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--deploy", action="store_true", help="Also create each valid PQ as an SDL Scheduled rule.") args = ap.parse_args() print(f"\n{'='*78}\n Sigma -> PowerQuery (faithful + relaxed + WEL) " f"-> SDL rule\n{'='*78}\n") print(f" Backend : pysigma-backend-sentinelone-pq") print(f" Tenant SDL : {SDL_BASE}") print(f" Tenant Mgmt API : {S1_CONS}") print(f" Deploy rules : {'YES' if args.deploy else 'no (use --deploy)'}") print(f" Output : {OUT}\n") # Site-id auto-discovery (only needed for --deploy). global SITE_ID if args.deploy and not SITE_ID: try: req = urllib.request.Request( f"{S1_CONS}/web/api/v2.1/sites?limit=10") req.add_header("Authorization", f"ApiToken {S1_TOK}") req.add_header("Accept", "application/json") with urllib.request.urlopen(req, timeout=20) as r: sites = ((json.loads(r.read()).get("data") or {}) .get("sites") or []) if not sites: print(" FATAL: --deploy requested but no sites visible " "to this token.") return 1 SITE_ID = sites[0]["id"] print(f" Site discovered : {SITE_ID} " f"({sites[0].get('name')})\n") except urllib.error.HTTPError as e: print(f" FATAL site discovery: HTTP {e.code} " f"{e.read().decode()[:200]}") return 1 # Pre-fetch the two relevant trees once print("--- listing sigmahq/sigma rule paths via gh api ---") pc_only = list_sigma_rules(allow_powershell=False) pc_and_pwsh = list_sigma_rules(allow_powershell=True) print(f" process_creation/ : {len(pc_only)} rules") print(f" process_creation/ + powershell/ : {len(pc_and_pwsh)} rules\n") summary: list[dict[str, Any]] = [] for i, (tactic, tech, kws, allow_pwsh) in enumerate(WANTED, 1): paths = pc_and_pwsh if allow_pwsh else pc_only rec: dict[str, Any] = {"i": i, "tactic": tactic, "tech": tech} print(f"[{i:02d}/10] {tactic} :: {tech}") path = pick(paths, kws) if not path: print(f" PICK : no match for {kws}\n") rec["status"] = "no_match"; summary.append(rec); continue print(f" PICK : {path}") rec["path"] = path try: raw = fetch(f"{SIGMA_RAW}/{path}").decode("utf-8") except Exception as e: print(f" FETCH : FAIL {e}\n") rec["status"] = "fetch_failed"; summary.append(rec); continue stem = pathlib.Path(path).stem (OUT / f"{stem}.yml").write_text(raw) try: pq_body = convert(raw) except Exception as e: print(f" CONVERT : FAIL {e}\n") rec["status"] = "convert_failed"; rec["err"] = str(e) summary.append(rec); continue relaxed_body = relax(pq_body) wel_body = wel_map(pq_body) (OUT / f"{stem}.pq").write_text(pq_body) (OUT / f"{stem}.relaxed.pq").write_text(relaxed_body) (OUT / f"{stem}.wel.pq").write_text(wel_body) rec["pq_chars"] = len(pq_body) rec["relaxed_chars"] = len(relaxed_body) rec["wel_chars"] = len(wel_body) print(f" CONVERT : OK faithful={len(pq_body)}c " f"relaxed={len(relaxed_body)}c wel={len(wel_body)}c") # smoke test all three c1, _, r1 = pq(pq_body) c2, _, r2 = pq(relaxed_body) c3, e3, r3 = pq(wel_body) rec.update({"fa_http": c1, "fa_rows": r1, "re_http": c2, "re_rows": r2, "wel_http": c3, "wel_rows": r3, "wel_err": e3 if c3 != 200 else ""}) print(f" TEST FA : HTTP {c1} rows={r1}") print(f" TEST RE : HTTP {c2} rows={r2}") print(f" TEST WEL: HTTP {c3} rows={r3}" f"{' err=' + e3[:120] if c3 != 200 else ''}") valid = (c1 == 200) or (c3 == 200) rec["status"] = ("FIRES" if (r1 > 0 or r2 > 0 or r3 > 0) else "valid_no_data" if valid else "PQ_ERROR") # deploy faithful (only) if requested + valid if args.deploy and c1 == 200: rule_name = (f"[Sigma->PQ] {tactic} / {tech} " f"({pathlib.Path(path).stem})")[:128] desc = (f"Auto-converted from SigmaHQ/sigma " f"{path} via pysigma-backend-sentinelone-pq. " f"Faithful S1 DV schema.") dc, dmsg = deploy_rule(rule_name, desc, pq_body) rec["deploy_http"] = dc; rec["deploy_msg"] = dmsg if dc == 200: # dmsg shape is "created id="; extract just the id rec["rule_id"] = dmsg.split("id=")[-1].strip() rec["pq_file"] = f"{pathlib.Path(path).stem}.pq" print(f" DEPLOY : HTTP {dc} {dmsg[:160]}") print() summary.append(rec) # --- summary --- print(f"{'='*78}\n SUMMARY (rows = events matched in last 24 h)\n" f"{'='*78}") hdr = (f" {'#':>3} {'tactic':<18}{'technique':<26}" f"{'fa':>5}{'re':>5}{'wel':>5} status") print(hdr); print(" " + "-" * (len(hdr) - 2)) for s in summary: print(f" {s['i']:>3} {s['tactic']:<18}{s['tech']:<26}" f"{s.get('fa_rows','-')!s:>5}{s.get('re_rows','-')!s:>5}" f"{s.get('wel_rows','-')!s:>5} {s.get('status','-')}") fires = sum(1 for s in summary if any(s.get(k, 0) and s[k] > 0 for k in ('fa_rows', 're_rows', 'wel_rows'))) valid = sum(1 for s in summary if s.get('status') in ('valid_no_data', 'FIRES')) failed = sum(1 for s in summary if s.get('status') in ('no_match', 'fetch_failed', 'convert_failed', 'PQ_ERROR')) print(f"\n Rules with any matches : {fires}/10") print(f" Syntactically valid : {valid}/10") print(f" Failed / not matched : {failed}/10") if args.deploy: deployed = [s for s in summary if s.get('deploy_http') == 200] print(f" SDL rules created : {len(deployed)}/10") # Persist the (rule_id, pq_file) map for verify scripts. ids_file = HERE / "deployed_rule_ids.json" ids_file.write_text(json.dumps( {"tenant": S1_CONS, "site_id": SITE_ID, "rules": [{"rule_id": s["rule_id"], "pq_file": s["pq_file"], "tactic": s["tactic"], "tech": s["tech"]} for s in deployed]}, indent=2)) print(f" Deployed IDs : {ids_file}") print(f" Artefacts : {OUT}/") print(f"\n Next steps:") print(f" - inspect {OUT}/*.wel.pq for WEL variants") print(f" - re-run with --deploy to create SDL Scheduled rules") print(f" - verify with verify_rule_exists_via_put.py") print(f" - check console UI: {S1_CONS}/#/cloud-detection/rules\n") return 0 if __name__ == "__main__": raise SystemExit(main())