mirror of
https://github.com/marcredhat/kql
synced 2026-06-08 21:27:09 +00:00
Initial commit: KQL ↔ SDL PowerQuery proof of equivalence
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Count duplicate timestamps within the generated JSONL.
|
||||
|
||||
SDL appears to dedupe addEvents by (session, ts) - events sharing a ts
|
||||
within the same session are silently dropped. If our generator emits many
|
||||
events at colliding ts_epoch_ms values, only one of each cluster survives.
|
||||
"""
|
||||
import json
|
||||
from collections import Counter, defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
JSONL = Path(__file__).resolve().parents[1] / "sample_data" / "events.jsonl"
|
||||
|
||||
per_type_total = Counter()
|
||||
per_type_unique = defaultdict(set)
|
||||
per_type_max_collision = defaultdict(int)
|
||||
with JSONL.open() as f:
|
||||
for line in f:
|
||||
r = json.loads(line)
|
||||
et = r["event_type"]
|
||||
ts = r["ts_epoch_ms"]
|
||||
per_type_total[et] += 1
|
||||
per_type_unique[et].add(ts)
|
||||
|
||||
print(f"{'event_type':30s} {'events':>8} {'uniq_ts':>8} {'collision_loss%':>16}")
|
||||
print("-" * 70)
|
||||
for et in sorted(per_type_total):
|
||||
n = per_type_total[et]
|
||||
u = len(per_type_unique[et])
|
||||
loss = 100 * (n - u) / n if n else 0
|
||||
print(f"{et:30s} {n:>8} {u:>8} {loss:>15.1f}%")
|
||||
print("-" * 70)
|
||||
print(f"{'TOTAL':30s} {sum(per_type_total.values()):>8} "
|
||||
f"{sum(len(s) for s in per_type_unique.values()):>8}")
|
||||
Reference in New Issue
Block a user