Initial commit: SIEM Toolkit for SentinelOne

Dockerized SecOps toolkit with:
- Coverage Map: STAR rule vs SDL parser field coverage analysis
- Ingest Dashboard: PowerQuery-powered event volume and source breakdown
- Onboarding Assistant: AI-guided log source onboarding with Claude
- Parser management via SDL MCP integration

Stack: FastAPI + PostgreSQL backend, nginx-served HTML frontend, Docker Compose.
PowerQuery runs via Scalyr XDR API (SDL_XDR_URL + SDL_LOG_READ_KEY).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mick
2026-05-19 11:39:26 -04:00
commit c182d837ee
42 changed files with 2273 additions and 0 deletions
View File
+273
View File
@@ -0,0 +1,273 @@
import json
from fastapi import APIRouter, UploadFile, File, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
from db import get_db, ParsedRule, ParserField
from services import s1_client, rule_parser
router = APIRouter()
def _star_query_texts(rule: dict) -> list[str]:
"""
Extract all PowerQuery/filter strings from a STAR rule.
Handles simple rules (s1ql) and correlation rules (subQueries[].subQuery).
"""
texts = []
# Simple rules
for field in ("s1ql", "queryLang", "query", "powerQuery"):
v = rule.get(field)
# queryLang "2.0" is a version string, not a query — skip short strings
if v and isinstance(v, str) and len(v) > 5:
texts.append(v)
# Correlation rules: subQueries[].subQuery
cp = rule.get("correlationParams") or {}
for sq in cp.get("subQueries", []):
v = sq.get("subQuery")
if v and isinstance(v, str):
texts.append(v)
# Also handle older conditions[] format
for cond in cp.get("conditions", []):
for key in ("filter", "query", "subQuery"):
v = cond.get(key)
if v and isinstance(v, str):
texts.append(v)
return texts
@router.post("/load-star-rules")
async def load_star_rules(db: Session = Depends(get_db)):
"""Fetch STAR rules from SentinelOne and index their fields."""
try:
rules = await s1_client.get_star_rules()
except Exception as e:
raise HTTPException(502, f"S1 API error: {e}")
# Replace all existing STAR rules cleanly to avoid duplicate key errors
db.query(ParsedRule).filter_by(rule_type="star").delete()
db.flush()
loaded = []
for rule in rules:
all_fields: set = set()
for qt in _star_query_texts(rule):
all_fields |= rule_parser.extract_star_fields(qt)
fields = list(all_fields)
record = ParsedRule(
rule_id=str(rule.get("id", "")),
name=rule.get("name", "unnamed"),
rule_type="star",
fields_used=fields,
raw=json.dumps(rule),
)
db.add(record)
loaded.append({"id": record.rule_id, "name": record.name, "fields": fields})
db.commit()
return {"loaded": len(loaded), "rules": loaded}
@router.post("/upload-sigma")
async def upload_sigma(files: list[UploadFile] = File(...), db: Session = Depends(get_db)):
"""Upload one or more Sigma YAML files and index their fields."""
loaded = []
for file in files:
content = (await file.read()).decode("utf-8", errors="replace")
fields = list(rule_parser.extract_sigma_fields(content))
record = ParsedRule(
rule_id=f"sigma_{file.filename}",
name=file.filename or "unnamed",
rule_type="sigma",
fields_used=fields,
raw=content,
)
db.merge(record)
loaded.append({"name": file.filename, "fields": fields})
db.commit()
return {"loaded": len(loaded), "rules": loaded}
@router.post("/load-parsers-from-sdl")
async def load_parsers_from_sdl(db: Session = Depends(get_db)):
"""
Load SDL parsers from the local /app/parsers directory (mounted from ./parsers/).
Files are placed there by the MCP-based loader or by manual copy.
Falls back to a clear error if the directory is empty.
"""
import os
parsers_dir = "/app/parsers"
try:
entries = [
e for e in os.scandir(parsers_dir)
if e.is_file() and not e.name.startswith(".")
]
except FileNotFoundError:
raise HTTPException(503, "parsers/ directory not found — check Docker volume mount")
if not entries:
raise HTTPException(
422,
"No parser files found in parsers/ directory. "
"Use 'Load SDL Parsers via MCP' in Claude Code to populate it, "
"or upload a parser file manually."
)
loaded = []
errors = []
for entry in entries:
try:
with open(entry.path, "r", encoding="utf-8", errors="replace") as fh:
content = fh.read()
fields: set = set()
try:
import json as _json
parser_data = _json.loads(content)
fields = rule_parser.extract_parser_fields(parser_data)
except Exception:
pass
fields |= rule_parser.extract_parser_fields_from_content(content)
name = entry.name
db.query(ParserField).filter_by(parser_name=name).delete()
for f in fields:
db.add(ParserField(parser_name=name, field_name=f, field_type="string"))
loaded.append({"parser": name, "fields": list(fields), "field_count": len(fields)})
except Exception as e:
errors.append({"parser": entry.name, "error": str(e)})
db.commit()
return {"loaded": len(loaded), "parsers": loaded, "errors": errors}
@router.post("/upload-parser")
async def upload_parser(file: UploadFile = File(...), db: Session = Depends(get_db)):
"""Upload an SDL parser JSON file and index its output fields."""
raw_bytes = await file.read()
content_str = raw_bytes.decode("utf-8", errors="replace")
# Try structured JSON extraction first, fall back to content-string extraction
fields: set = set()
try:
parser_data = json.loads(content_str)
fields = rule_parser.extract_parser_fields(parser_data)
except json.JSONDecodeError:
pass
# Always also run content-string extraction (catches $field$ SDL format strings)
fields |= rule_parser.extract_parser_fields_from_content(content_str)
db.query(ParserField).filter_by(parser_name=file.filename).delete()
for f in fields:
db.add(ParserField(parser_name=file.filename, field_name=f, field_type="string"))
db.commit()
return {"parser": file.filename, "fields": list(fields)}
class ParserContentPayload(BaseModel):
parser_name: str
content: str # raw SDL parser file content as string
@router.post("/load-parser-content")
async def load_parser_content(payload: ParserContentPayload, db: Session = Depends(get_db)):
"""
Accept raw SDL parser content (as a string) and index its output fields.
Used by MCP-based loader scripts since the SDL HTTP API endpoint is not
accessible from inside Docker with standard API token auth.
"""
fields: set = set()
# Try JSON parsing first (structured attributes/fields/mappings)
try:
parser_data = json.loads(payload.content)
fields = rule_parser.extract_parser_fields(parser_data)
except (json.JSONDecodeError, Exception):
pass
# Always run SDL format-string extraction ($field.name$ patterns)
fields |= rule_parser.extract_parser_fields_from_content(payload.content)
if not fields:
raise HTTPException(422, "No fields could be extracted from the parser content")
db.query(ParserField).filter_by(parser_name=payload.parser_name).delete()
for f in fields:
db.add(ParserField(parser_name=payload.parser_name, field_name=f, field_type="string"))
db.commit()
return {"parser": payload.parser_name, "fields": list(fields), "field_count": len(fields)}
@router.get("/map")
def get_coverage_map(db: Session = Depends(get_db)):
"""Return coverage analysis: parser fields vs rule fields."""
rules = db.query(ParsedRule).all()
parser_fields_rows = db.query(ParserField).all()
# field → list of rules using it + data sources referenced by those rules
rule_field_index: dict[str, list] = {}
rule_ds_index: dict[str, set] = {} # field → set of dataSource.name values
for rule in rules:
query_texts = _star_query_texts(json.loads(rule.raw)) if rule.rule_type == "star" else []
data_sources = rule_parser.extract_data_sources(query_texts)
for field in rule.fields_used or []:
rule_field_index.setdefault(field, []).append(
{"rule": rule.name, "type": rule.rule_type}
)
rule_ds_index.setdefault(field, set()).update(data_sources)
# field → parser name
parser_field_index: dict[str, str] = {
pf.field_name: pf.parser_name for pf in parser_fields_rows
}
all_fields = set(rule_field_index) | set(parser_field_index)
detail = {}
for f in all_fields:
in_parser = f in parser_field_index
in_rules = f in rule_field_index
detail[f] = {
"in_parser": in_parser,
"parser_name": parser_field_index.get(f),
"data_sources": sorted(rule_ds_index.get(f, set())),
"rule_count": len(rule_field_index.get(f, [])),
"rules": rule_field_index.get(f, []),
"status": (
"covered" if in_parser and in_rules
else "unused" if in_parser and not in_rules
else "missing_parser"
),
}
parsed_unused = [f for f, d in detail.items() if d["status"] == "unused"]
missing_parser = [f for f, d in detail.items() if d["status"] == "missing_parser"]
covered = [f for f, d in detail.items() if d["status"] == "covered"]
return {
"summary": {
"total_parser_fields": len(parser_field_index),
"total_rule_fields": len(rule_field_index),
"covered": len(covered),
"parsed_but_unused": len(parsed_unused),
"rules_missing_parser": len(missing_parser),
},
"parsed_but_unused": parsed_unused,
"rules_missing_parser": missing_parser,
"fields": detail,
}
@router.delete("/reset")
def reset_data(db: Session = Depends(get_db)):
db.query(ParsedRule).delete()
db.query(ParserField).delete()
db.commit()
return {"cleared": True}
+101
View File
@@ -0,0 +1,101 @@
from datetime import datetime, timedelta
from fastapi import APIRouter, Query, HTTPException
from pydantic import BaseModel
from services import s1_client
router = APIRouter()
def _date_range(days: int) -> tuple[str, str]:
now = datetime.utcnow()
return (
(now - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%S.000Z"),
now.strftime("%Y-%m-%dT%H:%M:%S.000Z"),
)
@router.get("/top-sources")
async def get_top_sources(days: int = Query(7, ge=1, le=90)):
"""Top log sources by event count over the given period."""
from_dt, to_dt = _date_range(days)
query = "| group events=count() by dataSource.name | sort -events | limit 25"
try:
result = await s1_client.run_powerquery(query, from_dt, to_dt)
except Exception as e:
raise HTTPException(502, f"PowerQuery error: {e}")
return {"period_days": days, "data": result.get("events", [])}
@router.get("/by-event-type")
async def get_by_event_type(days: int = Query(7, ge=1, le=90)):
"""Event counts grouped by source and event type."""
from_dt, to_dt = _date_range(days)
query = "| group events=count() by dataSource.name, event.type | sort -events | limit 100"
try:
result = await s1_client.run_powerquery(query, from_dt, to_dt)
except Exception as e:
raise HTTPException(502, f"PowerQuery error: {e}")
return {"period_days": days, "data": result.get("events", [])}
@router.get("/daily-volume")
async def get_daily_volume(days: int = Query(7, ge=1, le=14)):
"""Total event count per day."""
import asyncio
results = []
points = min(days, 7)
for i in range(points):
day_from = (datetime.utcnow() - timedelta(days=i + 1)).strftime("%Y-%m-%dT00:00:00.000Z")
day_to = (datetime.utcnow() - timedelta(days=i)).strftime("%Y-%m-%dT00:00:00.000Z")
label = (datetime.utcnow() - timedelta(days=i + 1)).strftime("%Y-%m-%d")
try:
result = await s1_client.run_powerquery("| group total=count()", day_from, day_to)
events_list = result.get("events") if isinstance(result, dict) else []
count = events_list[0].get("total", 0) if isinstance(events_list, list) and events_list else 0
except Exception:
count = 0
results.append({"date": label, "events": count})
if i < points - 1:
await asyncio.sleep(3)
return list(reversed(results))
class FilterRule(BaseModel):
source: str = ""
event_type: str = ""
days: int = 7
gb_per_million_events: float = 0.5
@router.post("/simulate-filter")
async def simulate_filter(rule: FilterRule):
"""Estimate how many events and GB would be eliminated by an exclusion filter."""
from_dt, to_dt = _date_range(rule.days)
clauses = []
if rule.source:
clauses.append(f'src.name = "{rule.source}"')
if rule.event_type:
clauses.append(f'event.type = "{rule.event_type}"')
filter_expr = " AND ".join(clauses) if clauses else "true"
query = f"| filter {filter_expr} | count() as events"
try:
result = await s1_client.run_powerquery(query, from_dt, to_dt)
events = (result.get("events") or [{}])[0].get("events", 0) if isinstance(result.get("events"), list) else 0
except Exception as e:
raise HTTPException(502, f"PowerQuery error: {e}")
estimated_gb = round(events / 1_000_000 * rule.gb_per_million_events, 3)
monthly_events = int(events / rule.days * 30)
monthly_gb = round(monthly_events / 1_000_000 * rule.gb_per_million_events, 2)
return {
"period_days": rule.days,
"matched_events": events,
"estimated_gb_period": estimated_gb,
"projected_monthly_events": monthly_events,
"projected_monthly_gb": monthly_gb,
"filter": {"source": rule.source, "event_type": rule.event_type},
}