commit c182d837ee14700d5ddc0988d90a36c8250056ab Author: Mick <119439091+mickbrowns1@users.noreply.github.com> Date: Tue May 19 11:39:26 2026 -0400 Initial commit: SIEM Toolkit for SentinelOne Dockerized SecOps toolkit with: - Coverage Map: STAR rule vs SDL parser field coverage analysis - Ingest Dashboard: PowerQuery-powered event volume and source breakdown - Onboarding Assistant: AI-guided log source onboarding with Claude - Parser management via SDL MCP integration Stack: FastAPI + PostgreSQL backend, nginx-served HTML frontend, Docker Compose. PowerQuery runs via Scalyr XDR API (SDL_XDR_URL + SDL_LOG_READ_KEY). Co-Authored-By: Claude Sonnet 4.6 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..84e80c6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.env +mcp_config.txt +__pycache__/ +*.pyc +node_modules/ +.next/ +frontend/out/ +pgdata/ +parsers/*.json diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..061e6ff --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.12-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] diff --git a/backend/db.py b/backend/db.py new file mode 100644 index 0000000..0b4a9d9 --- /dev/null +++ b/backend/db.py @@ -0,0 +1,46 @@ +import os +from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, Text +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import declarative_base, sessionmaker +from datetime import datetime + +DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://siem:siem@db:5432/siem") + +engine = create_engine(DATABASE_URL) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +Base = declarative_base() + + +class ParsedRule(Base): + __tablename__ = "parsed_rules" + id = Column(Integer, primary_key=True) + rule_id = Column(String, unique=True, index=True) + name = Column(String) + rule_type = Column(String) # 'star' or 'sigma' + fields_used = Column(JSONB) + raw = Column(Text) + cached_at = Column(DateTime, default=datetime.utcnow) + + +class ParserField(Base): + __tablename__ = "parser_fields" + id = Column(Integer, primary_key=True) + parser_name = Column(String, index=True) + field_name = Column(String) + field_type = Column(String) + + +class IngestSnapshot(Base): + __tablename__ = "ingest_snapshots" + id = Column(Integer, primary_key=True) + period_days = Column(Integer) + data = Column(JSONB) + recorded_at = Column(DateTime, default=datetime.utcnow) + + +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..851aa51 --- /dev/null +++ b/backend/main.py @@ -0,0 +1,24 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from db import engine, Base +from routers import coverage, ingest + +Base.metadata.create_all(bind=engine) + +app = FastAPI(title="SIEM Toolkit", version="1.0.0") + +app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:3001"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(coverage.router, prefix="/api/coverage", tags=["Coverage"]) +app.include_router(ingest.router, prefix="/api/ingest", tags=["Ingest"]) + + +@app.get("/health") +def health(): + return {"status": "ok"} diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..d242925 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,9 @@ +fastapi==0.115.0 +uvicorn[standard]==0.30.0 +httpx==0.27.2 +psycopg2-binary==2.9.9 +sqlalchemy==2.0.36 +pydantic==2.9.2 +pydantic-settings==2.6.1 +pyyaml==6.0.2 +python-multipart==0.0.12 diff --git a/backend/routers/__init__.py b/backend/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/routers/coverage.py b/backend/routers/coverage.py new file mode 100644 index 0000000..37fabe5 --- /dev/null +++ b/backend/routers/coverage.py @@ -0,0 +1,273 @@ +import json +from fastapi import APIRouter, UploadFile, File, Depends, HTTPException +from pydantic import BaseModel +from sqlalchemy.orm import Session +from db import get_db, ParsedRule, ParserField +from services import s1_client, rule_parser + +router = APIRouter() + + +def _star_query_texts(rule: dict) -> list[str]: + """ + Extract all PowerQuery/filter strings from a STAR rule. + Handles simple rules (s1ql) and correlation rules (subQueries[].subQuery). + """ + texts = [] + + # Simple rules + for field in ("s1ql", "queryLang", "query", "powerQuery"): + v = rule.get(field) + # queryLang "2.0" is a version string, not a query — skip short strings + if v and isinstance(v, str) and len(v) > 5: + texts.append(v) + + # Correlation rules: subQueries[].subQuery + cp = rule.get("correlationParams") or {} + for sq in cp.get("subQueries", []): + v = sq.get("subQuery") + if v and isinstance(v, str): + texts.append(v) + # Also handle older conditions[] format + for cond in cp.get("conditions", []): + for key in ("filter", "query", "subQuery"): + v = cond.get(key) + if v and isinstance(v, str): + texts.append(v) + + return texts + + +@router.post("/load-star-rules") +async def load_star_rules(db: Session = Depends(get_db)): + """Fetch STAR rules from SentinelOne and index their fields.""" + try: + rules = await s1_client.get_star_rules() + except Exception as e: + raise HTTPException(502, f"S1 API error: {e}") + + # Replace all existing STAR rules cleanly to avoid duplicate key errors + db.query(ParsedRule).filter_by(rule_type="star").delete() + db.flush() + + loaded = [] + for rule in rules: + all_fields: set = set() + for qt in _star_query_texts(rule): + all_fields |= rule_parser.extract_star_fields(qt) + fields = list(all_fields) + record = ParsedRule( + rule_id=str(rule.get("id", "")), + name=rule.get("name", "unnamed"), + rule_type="star", + fields_used=fields, + raw=json.dumps(rule), + ) + db.add(record) + loaded.append({"id": record.rule_id, "name": record.name, "fields": fields}) + + db.commit() + return {"loaded": len(loaded), "rules": loaded} + + +@router.post("/upload-sigma") +async def upload_sigma(files: list[UploadFile] = File(...), db: Session = Depends(get_db)): + """Upload one or more Sigma YAML files and index their fields.""" + loaded = [] + for file in files: + content = (await file.read()).decode("utf-8", errors="replace") + fields = list(rule_parser.extract_sigma_fields(content)) + record = ParsedRule( + rule_id=f"sigma_{file.filename}", + name=file.filename or "unnamed", + rule_type="sigma", + fields_used=fields, + raw=content, + ) + db.merge(record) + loaded.append({"name": file.filename, "fields": fields}) + + db.commit() + return {"loaded": len(loaded), "rules": loaded} + + +@router.post("/load-parsers-from-sdl") +async def load_parsers_from_sdl(db: Session = Depends(get_db)): + """ + Load SDL parsers from the local /app/parsers directory (mounted from ./parsers/). + Files are placed there by the MCP-based loader or by manual copy. + Falls back to a clear error if the directory is empty. + """ + import os + parsers_dir = "/app/parsers" + + try: + entries = [ + e for e in os.scandir(parsers_dir) + if e.is_file() and not e.name.startswith(".") + ] + except FileNotFoundError: + raise HTTPException(503, "parsers/ directory not found — check Docker volume mount") + + if not entries: + raise HTTPException( + 422, + "No parser files found in parsers/ directory. " + "Use 'Load SDL Parsers via MCP' in Claude Code to populate it, " + "or upload a parser file manually." + ) + + loaded = [] + errors = [] + for entry in entries: + try: + with open(entry.path, "r", encoding="utf-8", errors="replace") as fh: + content = fh.read() + + fields: set = set() + try: + import json as _json + parser_data = _json.loads(content) + fields = rule_parser.extract_parser_fields(parser_data) + except Exception: + pass + fields |= rule_parser.extract_parser_fields_from_content(content) + + name = entry.name + db.query(ParserField).filter_by(parser_name=name).delete() + for f in fields: + db.add(ParserField(parser_name=name, field_name=f, field_type="string")) + loaded.append({"parser": name, "fields": list(fields), "field_count": len(fields)}) + except Exception as e: + errors.append({"parser": entry.name, "error": str(e)}) + + db.commit() + return {"loaded": len(loaded), "parsers": loaded, "errors": errors} + + +@router.post("/upload-parser") +async def upload_parser(file: UploadFile = File(...), db: Session = Depends(get_db)): + """Upload an SDL parser JSON file and index its output fields.""" + raw_bytes = await file.read() + content_str = raw_bytes.decode("utf-8", errors="replace") + + # Try structured JSON extraction first, fall back to content-string extraction + fields: set = set() + try: + parser_data = json.loads(content_str) + fields = rule_parser.extract_parser_fields(parser_data) + except json.JSONDecodeError: + pass + + # Always also run content-string extraction (catches $field$ SDL format strings) + fields |= rule_parser.extract_parser_fields_from_content(content_str) + + db.query(ParserField).filter_by(parser_name=file.filename).delete() + for f in fields: + db.add(ParserField(parser_name=file.filename, field_name=f, field_type="string")) + + db.commit() + return {"parser": file.filename, "fields": list(fields)} + + +class ParserContentPayload(BaseModel): + parser_name: str + content: str # raw SDL parser file content as string + + +@router.post("/load-parser-content") +async def load_parser_content(payload: ParserContentPayload, db: Session = Depends(get_db)): + """ + Accept raw SDL parser content (as a string) and index its output fields. + Used by MCP-based loader scripts since the SDL HTTP API endpoint is not + accessible from inside Docker with standard API token auth. + """ + fields: set = set() + + # Try JSON parsing first (structured attributes/fields/mappings) + try: + parser_data = json.loads(payload.content) + fields = rule_parser.extract_parser_fields(parser_data) + except (json.JSONDecodeError, Exception): + pass + + # Always run SDL format-string extraction ($field.name$ patterns) + fields |= rule_parser.extract_parser_fields_from_content(payload.content) + + if not fields: + raise HTTPException(422, "No fields could be extracted from the parser content") + + db.query(ParserField).filter_by(parser_name=payload.parser_name).delete() + for f in fields: + db.add(ParserField(parser_name=payload.parser_name, field_name=f, field_type="string")) + + db.commit() + return {"parser": payload.parser_name, "fields": list(fields), "field_count": len(fields)} + + +@router.get("/map") +def get_coverage_map(db: Session = Depends(get_db)): + """Return coverage analysis: parser fields vs rule fields.""" + rules = db.query(ParsedRule).all() + parser_fields_rows = db.query(ParserField).all() + + # field → list of rules using it + data sources referenced by those rules + rule_field_index: dict[str, list] = {} + rule_ds_index: dict[str, set] = {} # field → set of dataSource.name values + for rule in rules: + query_texts = _star_query_texts(json.loads(rule.raw)) if rule.rule_type == "star" else [] + data_sources = rule_parser.extract_data_sources(query_texts) + for field in rule.fields_used or []: + rule_field_index.setdefault(field, []).append( + {"rule": rule.name, "type": rule.rule_type} + ) + rule_ds_index.setdefault(field, set()).update(data_sources) + + # field → parser name + parser_field_index: dict[str, str] = { + pf.field_name: pf.parser_name for pf in parser_fields_rows + } + + all_fields = set(rule_field_index) | set(parser_field_index) + + detail = {} + for f in all_fields: + in_parser = f in parser_field_index + in_rules = f in rule_field_index + detail[f] = { + "in_parser": in_parser, + "parser_name": parser_field_index.get(f), + "data_sources": sorted(rule_ds_index.get(f, set())), + "rule_count": len(rule_field_index.get(f, [])), + "rules": rule_field_index.get(f, []), + "status": ( + "covered" if in_parser and in_rules + else "unused" if in_parser and not in_rules + else "missing_parser" + ), + } + + parsed_unused = [f for f, d in detail.items() if d["status"] == "unused"] + missing_parser = [f for f, d in detail.items() if d["status"] == "missing_parser"] + covered = [f for f, d in detail.items() if d["status"] == "covered"] + + return { + "summary": { + "total_parser_fields": len(parser_field_index), + "total_rule_fields": len(rule_field_index), + "covered": len(covered), + "parsed_but_unused": len(parsed_unused), + "rules_missing_parser": len(missing_parser), + }, + "parsed_but_unused": parsed_unused, + "rules_missing_parser": missing_parser, + "fields": detail, + } + + +@router.delete("/reset") +def reset_data(db: Session = Depends(get_db)): + db.query(ParsedRule).delete() + db.query(ParserField).delete() + db.commit() + return {"cleared": True} diff --git a/backend/routers/ingest.py b/backend/routers/ingest.py new file mode 100644 index 0000000..b343a2d --- /dev/null +++ b/backend/routers/ingest.py @@ -0,0 +1,101 @@ +from datetime import datetime, timedelta +from fastapi import APIRouter, Query, HTTPException +from pydantic import BaseModel +from services import s1_client + +router = APIRouter() + + +def _date_range(days: int) -> tuple[str, str]: + now = datetime.utcnow() + return ( + (now - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%S.000Z"), + now.strftime("%Y-%m-%dT%H:%M:%S.000Z"), + ) + + +@router.get("/top-sources") +async def get_top_sources(days: int = Query(7, ge=1, le=90)): + """Top log sources by event count over the given period.""" + from_dt, to_dt = _date_range(days) + query = "| group events=count() by dataSource.name | sort -events | limit 25" + try: + result = await s1_client.run_powerquery(query, from_dt, to_dt) + except Exception as e: + raise HTTPException(502, f"PowerQuery error: {e}") + return {"period_days": days, "data": result.get("events", [])} + + +@router.get("/by-event-type") +async def get_by_event_type(days: int = Query(7, ge=1, le=90)): + """Event counts grouped by source and event type.""" + from_dt, to_dt = _date_range(days) + query = "| group events=count() by dataSource.name, event.type | sort -events | limit 100" + try: + result = await s1_client.run_powerquery(query, from_dt, to_dt) + except Exception as e: + raise HTTPException(502, f"PowerQuery error: {e}") + return {"period_days": days, "data": result.get("events", [])} + + +@router.get("/daily-volume") +async def get_daily_volume(days: int = Query(7, ge=1, le=14)): + """Total event count per day.""" + import asyncio + results = [] + points = min(days, 7) + for i in range(points): + day_from = (datetime.utcnow() - timedelta(days=i + 1)).strftime("%Y-%m-%dT00:00:00.000Z") + day_to = (datetime.utcnow() - timedelta(days=i)).strftime("%Y-%m-%dT00:00:00.000Z") + label = (datetime.utcnow() - timedelta(days=i + 1)).strftime("%Y-%m-%d") + try: + result = await s1_client.run_powerquery("| group total=count()", day_from, day_to) + events_list = result.get("events") if isinstance(result, dict) else [] + count = events_list[0].get("total", 0) if isinstance(events_list, list) and events_list else 0 + except Exception: + count = 0 + results.append({"date": label, "events": count}) + if i < points - 1: + await asyncio.sleep(3) + return list(reversed(results)) + + +class FilterRule(BaseModel): + source: str = "" + event_type: str = "" + days: int = 7 + gb_per_million_events: float = 0.5 + + +@router.post("/simulate-filter") +async def simulate_filter(rule: FilterRule): + """Estimate how many events and GB would be eliminated by an exclusion filter.""" + from_dt, to_dt = _date_range(rule.days) + + clauses = [] + if rule.source: + clauses.append(f'src.name = "{rule.source}"') + if rule.event_type: + clauses.append(f'event.type = "{rule.event_type}"') + + filter_expr = " AND ".join(clauses) if clauses else "true" + query = f"| filter {filter_expr} | count() as events" + + try: + result = await s1_client.run_powerquery(query, from_dt, to_dt) + events = (result.get("events") or [{}])[0].get("events", 0) if isinstance(result.get("events"), list) else 0 + except Exception as e: + raise HTTPException(502, f"PowerQuery error: {e}") + + estimated_gb = round(events / 1_000_000 * rule.gb_per_million_events, 3) + monthly_events = int(events / rule.days * 30) + monthly_gb = round(monthly_events / 1_000_000 * rule.gb_per_million_events, 2) + + return { + "period_days": rule.days, + "matched_events": events, + "estimated_gb_period": estimated_gb, + "projected_monthly_events": monthly_events, + "projected_monthly_gb": monthly_gb, + "filter": {"source": rule.source, "event_type": rule.event_type}, + } diff --git a/backend/services/__init__.py b/backend/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/services/rule_parser.py b/backend/services/rule_parser.py new file mode 100644 index 0000000..7287f7a --- /dev/null +++ b/backend/services/rule_parser.py @@ -0,0 +1,209 @@ +import re +import json +import yaml +from typing import Set, List + +_DS_PATTERN = re.compile( + r"dataSource\.name\s*[=in]+\s*[\('\"]([^'\"),]+)['\")]", + re.IGNORECASE, +) + + +# STAR PowerQuery operators that follow a field name +_STAR_OPS = [ + "ContainsCIS", "NotContainsCIS", "Contains", "NotContains", + "StartsWith", "EndsWith", "In", "NotIn", + "IsEmpty", "IsNotEmpty", "Matches", "NotMatches", + "GreaterOrEqual", "LessOrEqual", "GreaterThan", "LessThan", + "Between", "=", "!=", +] +_STAR_KEYWORD = {"and", "or", "not", "true", "false", "null"} +_OP_PATTERN = re.compile( + r"([\w.]+)\s*(?:" + "|".join(re.escape(op) for op in _STAR_OPS) + r")\b" + r"|([\w.]+)\s*=", # also catch field= (no-space form used in subQuery strings) + re.IGNORECASE, +) + + +def extract_star_fields(query: str) -> Set[str]: + """Extract field names referenced in a STAR PowerQuery/subQuery string.""" + fields: Set[str] = set() + for match in _OP_PATTERN.finditer(query): + field = match.group(1) or match.group(2) + if field and field.lower() not in _STAR_KEYWORD and not field[0].isdigit(): + fields.add(field) + return fields + + +def extract_sigma_fields(sigma_content: str) -> Set[str]: + """Extract field names from a Sigma rule YAML.""" + try: + rule = yaml.safe_load(sigma_content) + except Exception: + return set() + + fields: Set[str] = set() + detection = rule.get("detection", {}) if isinstance(rule, dict) else {} + + def _walk(node): + if isinstance(node, dict): + for key, val in node.items(): + if key == "condition": + continue + # Strip pipe modifiers: CommandLine|contains → CommandLine + clean = key.split("|")[0] + if clean and clean not in ("keywords",): + fields.add(clean) + _walk(val) + elif isinstance(node, list): + for item in node: + _walk(item) + + _walk(detection) + return fields + + +def extract_data_sources(texts: List[str]) -> List[str]: + """Extract dataSource.name values from a list of query strings.""" + sources: Set[str] = set() + for text in texts: + for match in _DS_PATTERN.finditer(text): + sources.add(match.group(1).strip()) + return sorted(sources) + + +_SDL_FIELD_PAT = re.compile(r'\$([a-zA-Z][a-zA-Z0-9._]*)(?:=[^$]*)?\$') +_SDL_ATTR_KEY_PAT = re.compile(r'"([a-zA-Z][a-zA-Z0-9._]+)"\s*:') +# Matches both quoted and unquoted output/to keys in rewrites: +# output: "user.name" OR "output": "user.name" +# "to": "src_endpoint.ip" +_SDL_REWRITE_OUT_PAT = re.compile( + r'(?:"output"|output|"to"|"replace")\s*:\s*"([a-zA-Z][a-zA-Z0-9._]+)"' +) + + +def extract_parser_fields_from_content(content: str) -> Set[str]: + """ + Extract output field names from SDL augmented-JSON parser content string. + Handles: + - $field.name$ and $field.name=pattern$ from format strings + - "output": "field.name" and output: "field.name" from rewrites + - quoted attribute keys from attributes{} blocks + """ + fields: Set[str] = set() + + # Fields from format strings: $field.name$ or $field.name=pattern_var$ + for match in _SDL_FIELD_PAT.finditer(content): + field = match.group(1) + # Skip pattern variable names (no dot, short, all lowercase) + if "." in field or field[0].isupper() or len(field) > 6: + fields.add(field) + + # Rewrite output targets: output: "field.name" / "output": "field.name" + _skip_values = {"$0", "1", "2", "3", "4", "99"} + for match in _SDL_REWRITE_OUT_PAT.finditer(content): + val = match.group(1) + if val not in _skip_values and "." in val: + fields.add(val) + + # Quoted attribute keys (skip single-word SDL builtins) + _skip_keys = {"id", "format", "halt", "input", "output", "match", "replace", + "timezone", "attribute", "attributes", "patterns", "formats", + "rewrites", "type", "version"} + for match in _SDL_ATTR_KEY_PAT.finditer(content): + key = match.group(1) + if key not in _skip_keys and ("." in key or len(key) > 8): + fields.add(key) + + return fields + + +_SKIP_FIELD_NAMES = { + "id", "format", "halt", "input", "output", "match", "replace", + "timezone", "attribute", "attributes", "patterns", "formats", + "rewrites", "type", "version", "source", "dataset", "predicate", + "transformations", "mappings", "observables", "fields", "constant", + "copy", "from", "to", "value", "field", "name", +} + + +def _extract_rewrite_fields(rewrites) -> Set[str]: + """Extract 'output' field names from a rewrites list.""" + fields: Set[str] = set() + if not isinstance(rewrites, list): + return fields + for rw in rewrites: + if not isinstance(rw, dict): + continue + # Standard SDL rewrite: {"input": "...", "output": "field.name"} + out = rw.get("output") or rw.get("to") + if out and isinstance(out, str) and "." in out and out not in _SKIP_FIELD_NAMES: + fields.add(out) + return fields + + +def _walk_mappings(node) -> Set[str]: + """Recursively extract copy.to and constant.field from SDL mappings blocks.""" + fields: Set[str] = set() + if isinstance(node, dict): + # transformations copy: {"copy": {"from": "...", "to": "field.name"}} + if "copy" in node and isinstance(node["copy"], dict): + to = node["copy"].get("to") + if to and isinstance(to, str) and "." in to: + fields.add(to) + # transformations constant: {"constant": {"value": ..., "field": "field.name"}} + if "constant" in node and isinstance(node["constant"], dict): + f = node["constant"].get("field") + if f and isinstance(f, str) and "." in f: + fields.add(f) + for v in node.values(): + fields |= _walk_mappings(v) + elif isinstance(node, list): + for item in node: + fields |= _walk_mappings(item) + return fields + + +def extract_parser_fields(parser_json: dict) -> Set[str]: + """ + Extract output field names from an SDL parser JSON dict. + Handles: attributes lists, fields lists, mappings targets, + rewrites[].output, rewrites[].to, copy.to, constant.field. + """ + fields: Set[str] = set() + + # Legacy: attributes as list of {name: ...} + for attr in parser_json.get("attributes", []): + if isinstance(attr, dict) and "name" in attr: + fields.add(attr["name"]) + + # Legacy: fields list + for field in parser_json.get("fields", []): + if isinstance(field, str): + fields.add(field) + elif isinstance(field, dict) and "name" in field: + fields.add(field["name"]) + + # Legacy: flat mappings list with "target" + for mapping in parser_json.get("mappings", []): + if isinstance(mapping, dict) and "target" in mapping: + fields.add(mapping["target"]) + + # SDL rewrites[].output in top-level formats[] + for fmt in parser_json.get("formats", []): + if isinstance(fmt, dict): + fields |= _extract_rewrite_fields(fmt.get("rewrites", [])) + + # SDL mappings block (nested transformations with copy.to / constant.field) + mappings_block = parser_json.get("mappings", {}) + if isinstance(mappings_block, dict): + fields |= _walk_mappings(mappings_block) + + # observables[].name + for obs in parser_json.get("observables", {}).get("fields", []): + if isinstance(obs, dict) and "name" in obs: + n = obs["name"] + if "." in n: + fields.add(n) + + return fields diff --git a/backend/services/s1_client.py b/backend/services/s1_client.py new file mode 100644 index 0000000..66f39f5 --- /dev/null +++ b/backend/services/s1_client.py @@ -0,0 +1,135 @@ +import os +import asyncio +import httpx +from datetime import datetime, timezone + +BASE_URL = os.environ.get("S1_BASE_URL", "https://demo.sentinelone.net").rstrip("/") +TOKEN = os.environ.get("S1_API_TOKEN", "") + +# Scalyr/XDR PowerQuery credentials — from SDL_XDR_URL + SDL_LOG_READ_KEY +# in the SentinelOne console: Settings → Integrations → Data Lake API Keys +SDL_XDR_URL = os.environ.get("SDL_XDR_URL", "https://xdr.us1.sentinelone.net").rstrip("/") +SDL_LOG_READ_KEY = os.environ.get("SDL_LOG_READ_KEY", "") + +# Management Console API uses ApiToken auth +HEADERS = { + "Authorization": f"ApiToken {TOKEN}", + "Content-Type": "application/json", +} + + +def _iso_to_epoch_ms(iso_str: str) -> int: + """Convert ISO-8601 UTC string to epoch milliseconds for Scalyr API.""" + dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00")) + return int(dt.timestamp() * 1000) + + +async def get_star_rules(limit: int = 200) -> list: + """Fetch active STAR rules from the Management Console API.""" + async with httpx.AsyncClient(timeout=30) as client: + resp = await client.get( + f"{BASE_URL}/web/api/v2.1/cloud-detection/rules", + headers=HEADERS, + params={"limit": limit}, + ) + resp.raise_for_status() + return resp.json().get("data", []) + + +async def run_powerquery(query: str, from_date: str, to_date: str) -> dict: + """ + Run a PowerQuery against the Singularity Data Lake via the Scalyr XDR API. + Uses SDL_XDR_URL + SDL_LOG_READ_KEY (Scalyr readlog token). + The Scalyr PowerQuery API is synchronous — results return in one request. + """ + if not SDL_LOG_READ_KEY: + return {"events": [], "error": "SDL_LOG_READ_KEY not configured — add it to .env"} + + start_ms = _iso_to_epoch_ms(from_date) + end_ms = _iso_to_epoch_ms(to_date) + + payload = { + "token": SDL_LOG_READ_KEY, + "query": query, + "startTime": start_ms, + "endTime": end_ms, + "maxCount": 1000, + } + + async with httpx.AsyncClient(timeout=120) as client: + for attempt in range(3): + try: + resp = await client.post( + f"{SDL_XDR_URL}/api/powerQuery", + json=payload, + ) + resp.raise_for_status() + break + except httpx.HTTPStatusError as e: + if e.response.status_code == 429 and attempt < 2: + await asyncio.sleep(10 * (attempt + 1)) + continue + raise RuntimeError( + f"HTTP {e.response.status_code} from {e.request.url}: {e.response.text[:500]}" + ) from e + + data = resp.json() + status = data.get("status", "") + + if status != "success": + # Return full response as error detail for debugging + return {"events": [], "error": f"PowerQuery status={status}: {str(data)[:400]}"} + + # Scalyr PowerQuery returns: {"status":"success","columns":[{"name":"..."},...], "values":[[...],...],...} + raw_cols = data.get("columns", []) + values = data.get("values", []) + + if raw_cols and values: + # columns may be list of strings or list of {"name":...} dicts + col_names = [ + c["name"] if isinstance(c, dict) else c + for c in raw_cols + ] + rows = [dict(zip(col_names, row)) for row in values] + return {"events": rows} + + # Fallback: return raw matches array + matches = data.get("matches", []) + return {"events": matches} + + +async def list_sdl_parsers() -> list[str]: + """List all parser filenames under /logParsers/ in SDL.""" + async with httpx.AsyncClient(timeout=30) as client: + resp = await client.get( + f"{BASE_URL}/api/v1/files/logParsers", + headers=HEADERS, + ) + resp.raise_for_status() + data = resp.json() + # Response is a list of file objects or a dict with 'files' key + if isinstance(data, list): + return [f.get("name") or f.get("path", "") for f in data if isinstance(f, dict)] + return [f.get("name") or f.get("path", "") for f in data.get("files", [])] + + +async def get_sdl_parser(filename: str) -> dict: + """Fetch a single SDL parser file by name.""" + async with httpx.AsyncClient(timeout=30) as client: + resp = await client.get( + f"{BASE_URL}/api/v1/files/logParsers/{filename}", + headers=HEADERS, + ) + resp.raise_for_status() + return resp.json() + + +async def get_sites() -> list: + async with httpx.AsyncClient(timeout=30) as client: + resp = await client.get( + f"{BASE_URL}/web/api/v2.1/sites", + headers=HEADERS, + params={"limit": 100}, + ) + resp.raise_for_status() + return resp.json().get("data", {}).get("sites", []) diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..b9b2cd6 --- /dev/null +++ b/build.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e +echo "==> Starting Docker containers..." +docker-compose up --build "$@" diff --git a/db/init.sql b/db/init.sql new file mode 100644 index 0000000..e377c1c --- /dev/null +++ b/db/init.sql @@ -0,0 +1,3 @@ +-- Tables are created by SQLAlchemy on startup. +-- This file exists for the postgres healthcheck mount. +SELECT 1; diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3c396e0 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,42 @@ +services: + frontend: + build: ./frontend + ports: + - "3001:3000" + depends_on: + - backend + + backend: + build: ./backend + ports: + - "8001:8000" + environment: + - S1_API_TOKEN=${S1_API_TOKEN} + - S1_BASE_URL=${S1_BASE_URL} + - SDL_XDR_URL=${SDL_XDR_URL} + - SDL_LOG_READ_KEY=${SDL_LOG_READ_KEY} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - DATABASE_URL=postgresql://siem:siem@db:5432/siem + depends_on: + db: + condition: service_healthy + volumes: + - ./parsers:/app/parsers + + db: + image: postgres:16-alpine + environment: + - POSTGRES_DB=siem + - POSTGRES_USER=siem + - POSTGRES_PASSWORD=siem + volumes: + - pgdata:/var/lib/postgresql/data + - ./db/init.sql:/docker-entrypoint-initdb.d/init.sql + healthcheck: + test: ["CMD-SHELL", "pg_isready -U siem"] + interval: 5s + timeout: 5s + retries: 5 + +volumes: + pgdata: diff --git a/frontend/.dockerignore b/frontend/.dockerignore new file mode 100644 index 0000000..b90a368 --- /dev/null +++ b/frontend/.dockerignore @@ -0,0 +1,2 @@ +node_modules +.next diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..ef916a9 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,4 @@ +FROM nginx:alpine +COPY index.html /usr/share/nginx/html/index.html +COPY nginx.conf /etc/nginx/conf.d/default.conf +EXPOSE 3000 diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..696775d --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,421 @@ + + + + + + SIEM Toolkit + + + + + + +
+ + + + diff --git a/frontend/next.config.js b/frontend/next.config.js new file mode 100644 index 0000000..5c113ca --- /dev/null +++ b/frontend/next.config.js @@ -0,0 +1,6 @@ +/** @type {import('next').NextConfig} */ +const nextConfig = { + output: 'export', + trailingSlash: true, +} +module.exports = nextConfig diff --git a/frontend/nginx.conf b/frontend/nginx.conf new file mode 100644 index 0000000..dfaedad --- /dev/null +++ b/frontend/nginx.conf @@ -0,0 +1,9 @@ +server { + listen 3000; + root /usr/share/nginx/html; + index index.html; + + location / { + try_files $uri $uri/ $uri.html /index.html; + } +} diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..ced1333 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,29 @@ +{ + "name": "siem-toolkit", + "version": "1.0.0", + "private": true, + "scripts": { + "dev": "next dev", + "build": "next build", + "start": "next start" + }, + "dependencies": { + "next": "14.2.5", + "react": "18.3.1", + "react-dom": "18.3.1", + "recharts": "2.12.7", + "@tanstack/react-query": "5.56.2", + "axios": "1.7.7", + "clsx": "2.1.1", + "lucide-react": "0.441.0" + }, + "devDependencies": { + "@types/node": "22.5.4", + "@types/react": "18.3.5", + "@types/react-dom": "18.3.0", + "autoprefixer": "10.4.20", + "postcss": "8.4.45", + "tailwindcss": "3.4.11", + "typescript": "5.6.2" + } +} diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js new file mode 100644 index 0000000..95aa892 --- /dev/null +++ b/frontend/postcss.config.js @@ -0,0 +1,3 @@ +module.exports = { + plugins: { tailwindcss: {}, autoprefixer: {} }, +} diff --git a/frontend/src/app/coverage/page.tsx b/frontend/src/app/coverage/page.tsx new file mode 100644 index 0000000..ef77052 --- /dev/null +++ b/frontend/src/app/coverage/page.tsx @@ -0,0 +1,232 @@ +'use client' + +import { useState, useRef } from 'react' +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query' +import { api } from '@/lib/api' +import clsx from 'clsx' + +type FieldDetail = { + in_parser: boolean + parser_name: string | null + rule_count: number + rules: { rule: string; type: string }[] + status: 'covered' | 'unused' | 'missing_parser' +} + +type CoverageMap = { + summary: { + total_parser_fields: number + total_rule_fields: number + covered: number + parsed_but_unused: number + rules_missing_parser: number + } + fields: Record +} + +const STATUS_STYLE = { + covered: 'bg-emerald-900/50 text-emerald-300 border-emerald-700', + unused: 'bg-yellow-900/50 text-yellow-300 border-yellow-700', + missing_parser: 'bg-red-900/50 text-red-300 border-red-700', +} + +const STATUS_LABEL = { + covered: 'Covered', + unused: 'Unused (reduce candidate)', + missing_parser: 'Missing parser', +} + +export default function CoveragePage() { + const qc = useQueryClient() + const sigmaRef = useRef(null) + const parserRef = useRef(null) + const [filter, setFilter] = useState<'all' | 'covered' | 'unused' | 'missing_parser'>('all') + const [err, setErr] = useState('') + + const { data, isLoading } = useQuery({ + queryKey: ['coverage-map'], + queryFn: () => api.get('/api/coverage/map'), + }) + + const loadStar = useMutation({ + mutationFn: () => api.post('/api/coverage/load-star-rules', {}), + onSuccess: () => qc.invalidateQueries({ queryKey: ['coverage-map'] }), + onError: (e: Error) => setErr(e.message), + }) + + const uploadSigma = useMutation({ + mutationFn: async (files: FileList) => { + const form = new FormData() + Array.from(files).forEach((f) => form.append('files', f)) + return api.postForm('/api/coverage/upload-sigma', form) + }, + onSuccess: () => qc.invalidateQueries({ queryKey: ['coverage-map'] }), + onError: (e: Error) => setErr(e.message), + }) + + const uploadParser = useMutation({ + mutationFn: async (file: File) => { + const form = new FormData() + form.append('file', file) + return api.postForm('/api/coverage/upload-parser', form) + }, + onSuccess: () => qc.invalidateQueries({ queryKey: ['coverage-map'] }), + onError: (e: Error) => setErr(e.message), + }) + + const reset = useMutation({ + mutationFn: () => api.get('/api/coverage/reset'), + onSuccess: () => qc.invalidateQueries({ queryKey: ['coverage-map'] }), + }) + + const fields = data + ? Object.entries(data.fields).filter( + ([, d]) => filter === 'all' || d.status === filter + ) + : [] + + const busy = loadStar.isPending || uploadSigma.isPending || uploadParser.isPending + + return ( +
+
+
+

Parser Coverage Map

+

+ Cross-reference SDL parser fields against STAR / Sigma rule fields +

+
+
+ + + + +
+
+ + e.target.files && uploadSigma.mutate(e.target.files)} + /> + e.target.files?.[0] && uploadParser.mutate(e.target.files[0])} + /> + + {err && ( +
+ {err} +
+ )} + + {data && ( +
+ {[ + { label: 'Parser Fields', value: data.summary.total_parser_fields, color: 'text-gray-200' }, + { label: 'Rule Fields', value: data.summary.total_rule_fields, color: 'text-gray-200' }, + { label: 'Covered', value: data.summary.covered, color: 'text-emerald-400' }, + { label: 'Parsed Unused', value: data.summary.parsed_but_unused, color: 'text-yellow-400' }, + { label: 'Missing Parser', value: data.summary.rules_missing_parser, color: 'text-red-400' }, + ].map(({ label, value, color }) => ( +
+
{value}
+
{label}
+
+ ))} +
+ )} + +
+ {(['all', 'covered', 'unused', 'missing_parser'] as const).map((f) => ( + + ))} +
+ + {isLoading ? ( +
Loading…
+ ) : fields.length === 0 ? ( +
+ {data ? 'No fields match this filter.' : 'Load STAR rules or upload parsers to begin.'} +
+ ) : ( +
+ + + + + + + + + + + {fields.map(([field, detail]) => ( + + + + + + + ))} + +
FieldStatusParserRules using it
{field} + + {STATUS_LABEL[detail.status]} + + {detail.parser_name ?? '—'} + {detail.rule_count > 0 + ? detail.rules.map((r) => r.rule).join(', ') + : '—'} +
+
+ )} +
+ ) +} diff --git a/frontend/src/app/globals.css b/frontend/src/app/globals.css new file mode 100644 index 0000000..b5c61c9 --- /dev/null +++ b/frontend/src/app/globals.css @@ -0,0 +1,3 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; diff --git a/frontend/src/app/ingest/page.tsx b/frontend/src/app/ingest/page.tsx new file mode 100644 index 0000000..64c0850 --- /dev/null +++ b/frontend/src/app/ingest/page.tsx @@ -0,0 +1,169 @@ +'use client' + +import { useState } from 'react' +import { useQuery, useMutation } from '@tanstack/react-query' +import { + BarChart, Bar, XAxis, YAxis, Tooltip, ResponsiveContainer, CartesianGrid, +} from 'recharts' +import { api } from '@/lib/api' + +type SourceRow = { 'src.name': string; events: number } +type DayRow = { date: string; events: number } + +export default function IngestPage() { + const [days, setDays] = useState(7) + const [simSource, setSimSource] = useState('') + const [simEventType, setSimEventType] = useState('') + const [simResult, setSimResult] = useState | null>(null) + const [simErr, setSimErr] = useState('') + + const sources = useQuery<{ data: SourceRow[] }>({ + queryKey: ['top-sources', days], + queryFn: () => api.get(`/api/ingest/top-sources?days=${days}`), + }) + + const daily = useQuery({ + queryKey: ['daily-volume', days], + queryFn: () => api.get(`/api/ingest/daily-volume?days=${days}`), + }) + + const simulate = useMutation({ + mutationFn: () => + api.post>('/api/ingest/simulate-filter', { + source: simSource, + event_type: simEventType, + days, + gb_per_million_events: 0.5, + }), + onSuccess: (data) => { setSimResult(data); setSimErr('') }, + onError: (e: Error) => setSimErr(e.message), + }) + + const chartData = (sources.data?.data ?? []).slice(0, 15).map((r) => ({ + name: r['src.name'] ?? 'unknown', + events: r.events ?? 0, + })) + + return ( +
+
+
+

Ingest Dashboard

+

Event volume · cost projection · filter simulator

+
+
+ {[7, 14, 30].map((d) => ( + + ))} +
+
+ + {/* Daily volume chart */} +
+

Daily Event Volume

+ {daily.isLoading ? ( +
Loading…
+ ) : ( + + + + + + + + + + )} +
+ + {/* Top sources table */} +
+

Top Sources — last {days}d

+ {sources.isLoading ? ( +
Loading…
+ ) : sources.isError ? ( +
{String(sources.error)}
+ ) : ( + + + + + + + + + + {chartData.map((row) => ( + + + + + + ))} + +
SourceEventsEst. GB
{row.name}{row.events.toLocaleString()} + {(row.events / 1_000_000 * 0.5).toFixed(3)} +
+ )} +
+ + {/* Filter simulator */} +
+

Filter Simulator

+

+ Estimate events and GB eliminated by dropping a source + event type combination. +

+
+ setSimSource(e.target.value)} + placeholder="Source name (optional)" + className="flex-1 min-w-48 bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-200 placeholder-gray-600 focus:outline-none focus:border-purple-600" + /> + setSimEventType(e.target.value)} + placeholder="Event type (optional)" + className="flex-1 min-w-48 bg-gray-800 border border-gray-700 rounded-lg px-3 py-2 text-sm text-gray-200 placeholder-gray-600 focus:outline-none focus:border-purple-600" + /> + +
+ {simErr &&

{simErr}

} + {simResult && ( +
+ {[ + { label: 'Matched Events', value: String(simResult.matched_events ?? 0) }, + { label: `Est. GB (${days}d)`, value: String(simResult.estimated_gb_period ?? 0) }, + { label: 'Projected Monthly Events', value: String(simResult.projected_monthly_events ?? 0) }, + { label: 'Projected Monthly GB', value: String(simResult.projected_monthly_gb ?? 0) }, + ].map(({ label, value }) => ( +
+
{value}
+
{label}
+
+ ))} +
+ )} +
+
+ ) +} diff --git a/frontend/src/app/layout.tsx b/frontend/src/app/layout.tsx new file mode 100644 index 0000000..f0525b9 --- /dev/null +++ b/frontend/src/app/layout.tsx @@ -0,0 +1,22 @@ +import type { Metadata } from 'next' +import './globals.css' +import Sidebar from '@/components/Sidebar' +import QueryProvider from '@/components/QueryProvider' + +export const metadata: Metadata = { + title: 'SIEM Toolkit', + description: 'SentinelOne AI-SIEM Engineering Toolkit', +} + +export default function RootLayout({ children }: { children: React.ReactNode }) { + return ( + + + + +
{children}
+
+ + + ) +} diff --git a/frontend/src/app/onboarding/_CopyButton.tsx b/frontend/src/app/onboarding/_CopyButton.tsx new file mode 100644 index 0000000..617dc54 --- /dev/null +++ b/frontend/src/app/onboarding/_CopyButton.tsx @@ -0,0 +1,21 @@ +'use client' + +import { useState } from 'react' +import { Copy, Check } from 'lucide-react' + +export default function CopyButton({ text }: { text: string }) { + const [copied, setCopied] = useState(false) + return ( + + ) +} diff --git a/frontend/src/app/onboarding/page.tsx b/frontend/src/app/onboarding/page.tsx new file mode 100644 index 0000000..6079526 --- /dev/null +++ b/frontend/src/app/onboarding/page.tsx @@ -0,0 +1,78 @@ +import { Zap, MessageSquare, FileText, Code2 } from 'lucide-react' + +const STEPS = [ + { + icon: FileText, + title: '1. Grab a log sample', + desc: 'Copy 10–50 representative lines from the new log source. Include edge cases — errors, different event types, varying field presence.', + }, + { + icon: MessageSquare, + title: '2. Paste into Claude Code', + desc: 'Open Claude Code and say: "Onboard this log source for SentinelOne SDL" then paste the sample. Mention the source type if known (e.g. "Palo Alto firewall").', + }, + { + icon: Code2, + title: '3. Get your artefacts', + desc: 'Claude returns an SDL parser (augmented-JSON), field mappings to the SDL schema, starter STAR detection rules, and parser test assertions.', + }, + { + icon: Zap, + title: '4. Deploy', + desc: 'Drop the parser JSON into your /logParsers/ path. Paste the STAR rules into the AI-SIEM rule editor. Run the test assertions to validate extraction.', + }, +] + +const PROMPT = `Onboard this log source for SentinelOne SDL. Please generate: +1. An SDL parser skeleton in augmented-JSON format (/logParsers/ format) +2. Field mappings from raw fields to the SDL common schema +3. 2–3 starter STAR detection rules for common threats from this source type +4. 5 parser test assertions (input line → expected field → expected value) + +Log source: [describe source, e.g. "Palo Alto PAN-OS firewall"] + +Raw log sample: +[paste your log lines here]` + +export default function OnboardingPage() { + return ( +
+
+

Onboarding Accelerator

+

+ Use Claude Code directly — no API key required +

+
+ +
+ {STEPS.map(({ icon: Icon, title, desc }) => ( +
+
+ +
+
+
{title}
+
{desc}
+
+
+ ))} +
+ +
+
+ Copy this prompt template + +
+
{PROMPT}
+
+
+ ) +} + +function CopyButton({ text }: { text: string }) { + 'use client' + return <_CopyButton text={text} /> +} + +// Split to keep the page a server component with one small client island +import _CopyButton from './_CopyButton' diff --git a/frontend/src/app/page.tsx b/frontend/src/app/page.tsx new file mode 100644 index 0000000..dde6866 --- /dev/null +++ b/frontend/src/app/page.tsx @@ -0,0 +1,59 @@ +import { Shield, BarChart2, Zap } from 'lucide-react' +import Link from 'next/link' + +const CARDS = [ + { + href: '/coverage', + icon: Shield, + title: 'Parser Coverage Map', + desc: 'Cross-reference SDL parser output fields against STAR and Sigma rule fields. Surface parsed-but-unused fields as reduction candidates.', + cta: 'Open Coverage Map', + color: 'from-purple-700 to-purple-900', + }, + { + href: '/ingest', + icon: BarChart2, + title: 'Ingest Dashboard', + desc: 'Visualize event volume by source and type. Project monthly GB costs and simulate the impact of exclusion filters before applying them.', + cta: 'Open Dashboard', + color: 'from-blue-700 to-blue-900', + }, + { + href: '/onboarding', + icon: Zap, + title: 'Onboarding Accelerator', + desc: 'Step-by-step guide for onboarding a new log source using Claude Code directly — no API key required.', + cta: 'View Onboarding Guide', + color: 'from-emerald-700 to-emerald-900', + }, +] + +export default function Home() { + return ( +
+
+

SIEM Engineering Toolkit

+

SentinelOne AI-SIEM · demo.sentinelone.net

+
+
+ {CARDS.map(({ href, icon: Icon, title, desc, cta, color }) => ( +
+
+ +
+
+

{title}

+

{desc}

+
+ + {cta} → + +
+ ))} +
+
+ ) +} diff --git a/frontend/src/components/QueryProvider.tsx b/frontend/src/components/QueryProvider.tsx new file mode 100644 index 0000000..b744f07 --- /dev/null +++ b/frontend/src/components/QueryProvider.tsx @@ -0,0 +1,9 @@ +'use client' + +import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { useState } from 'react' + +export default function QueryProvider({ children }: { children: React.ReactNode }) { + const [client] = useState(() => new QueryClient({ defaultOptions: { queries: { retry: 1 } } })) + return {children} +} diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx new file mode 100644 index 0000000..edbbc57 --- /dev/null +++ b/frontend/src/components/Sidebar.tsx @@ -0,0 +1,45 @@ +'use client' + +import Link from 'next/link' +import { usePathname } from 'next/navigation' +import { Shield, BarChart2, Zap, Home } from 'lucide-react' +import clsx from 'clsx' + +const NAV = [ + { href: '/', label: 'Overview', icon: Home }, + { href: '/coverage', label: 'Parser Coverage', icon: Shield }, + { href: '/ingest', label: 'Ingest Dashboard', icon: BarChart2 }, + { href: '/onboarding', label: 'Onboarding', icon: Zap }, +] + +export default function Sidebar() { + const path = usePathname() + return ( + + ) +} diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts new file mode 100644 index 0000000..924ee41 --- /dev/null +++ b/frontend/src/lib/api.ts @@ -0,0 +1,22 @@ +const BASE = process.env.NEXT_PUBLIC_API_URL ?? 'http://localhost:8000' + +export async function apiFetch(path: string, init?: RequestInit): Promise { + const res = await fetch(`${BASE}${path}`, init) + if (!res.ok) { + const text = await res.text() + throw new Error(`${res.status}: ${text}`) + } + return res.json() as Promise +} + +export const api = { + get: (path: string) => apiFetch(path), + post: (path: string, body: unknown) => + apiFetch(path, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }), + postForm: (path: string, form: FormData) => + apiFetch(path, { method: 'POST', body: form }), +} diff --git a/frontend/tailwind.config.js b/frontend/tailwind.config.js new file mode 100644 index 0000000..e4fa2e3 --- /dev/null +++ b/frontend/tailwind.config.js @@ -0,0 +1,12 @@ +/** @type {import('tailwindcss').Config} */ +module.exports = { + content: ['./src/**/*.{ts,tsx}'], + theme: { + extend: { + colors: { + brand: '#7c3aed', + }, + }, + }, + plugins: [], +} diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json new file mode 100644 index 0000000..f0de5f0 --- /dev/null +++ b/frontend/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "es2017", + "lib": ["dom", "dom.iterable", "esnext"], + "allowJs": true, + "skipLibCheck": true, + "strict": true, + "noEmit": true, + "esModuleInterop": true, + "module": "esnext", + "moduleResolution": "bundler", + "resolveJsonModule": true, + "isolatedModules": true, + "jsx": "preserve", + "incremental": true, + "plugins": [{ "name": "next" }], + "paths": { "@/*": ["./src/*"] } + }, + "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], + "exclude": ["node_modules"] +} diff --git a/parsers/.gitkeep b/parsers/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/parsers/aws_cloudtrail-latest b/parsers/aws_cloudtrail-latest new file mode 100644 index 0000000..1f85c8c --- /dev/null +++ b/parsers/aws_cloudtrail-latest @@ -0,0 +1,29 @@ +{ + "attributes": { + "dataSource.vendor": "AWS", + "dataSource.name": "AWS Web Application Firewall", + "dataSource.category": "web_security" + }, + "formats": [ + { + "id": "aws_waf_json", + "format": ".*${parse=json}$", + "rewrites": [ + { "input": "timestamp", "output": "time", "match": ".*", "replace": "$0" }, + { "input": "httpRequest.clientIp", "output": "src_endpoint.ip", "match": ".*", "replace": "$0" }, + { "input": "action", "output": "disposition", "match": ".*", "replace": "$0" }, + { "input": "httpRequest.uri", "output": "http_request.url.text", "match": ".*", "replace": "$0" }, + { "input": "httpRequest.country", "output": "src_endpoint.location.country", "match": ".*", "replace": "$0" }, + { "input": "httpRequest.httpMethod", "output": "http_request.http_method", "match": ".*", "replace": "$0" }, + { "input": "webaclId", "output": "firewall_rule.uid", "match": ".*", "replace": "$0" }, + { "input": "ruleGroupId", "output": "firewall_rule.name", "match": ".*", "replace": "$0" }, + { "input": "terminatingRuleType", "output": "firewall_rule.type", "match": ".*", "replace": "$0" }, + { "input": "httpRequest.httpVersion", "output": "http_request.version", "match": ".*", "replace": "$0" }, + { "input": "httpRequest.args", "output": "http_request.url.query_string", "match": ".*", "replace": "$0" }, + { "input": "requestId", "output": "http_request.uid", "match": ".*", "replace": "$0" }, + { "input": "httpRequest.headers", "output": "http_request.http_headers", "match": ".*", "replace": "$0" } + ], + "halt": true + } + ] +} \ No newline at end of file diff --git a/parsers/cisco_duo-latest b/parsers/cisco_duo-latest new file mode 100644 index 0000000..ad54d60 --- /dev/null +++ b/parsers/cisco_duo-latest @@ -0,0 +1,42 @@ +{ + "attributes": { + "dataSource.vendor": "Cisco", + "dataSource.name": "Cisco Duo Security", + "dataSource.category": "security" + }, + "formats": [ + { + "format": "$unmapped.{parse=json}$", + "rewrites": [ + { "input": "unmapped.timestamp", "output": "timestamp", "match": ".*", "replace": "$0" }, + { "input": "unmapped.time", "output": "time", "match": ".*", "replace": "$0" } + ] + } + ], + "mappings": { + "version": 1, + "mappings": [ + { + "predicate": "unmapped.class_uid = '3002'", + "transformations": [ + { "copy": { "from": "unmapped.activity_id", "to": "activity_id" } }, + { "copy": { "from": "unmapped.activity_name", "to": "activity_name" } }, + { "copy": { "from": "unmapped.type_uid", "to": "type_uid" } }, + { "copy": { "from": "unmapped.severity_id", "to": "severity_id" } }, + { "copy": { "from": "unmapped.status_id", "to": "status_id" } }, + { "copy": { "from": "unmapped.status", "to": "status" } }, + { "copy": { "from": "unmapped.message", "to": "message" } }, + { "copy": { "from": "unmapped.user.name", "to": "user.name" } }, + { "copy": { "from": "unmapped.user.account_uid", "to": "user.account_uid" } }, + { "copy": { "from": "unmapped.user.account_type", "to": "user.account_type" } }, + { "copy": { "from": "unmapped.src_endpoint.ip", "to": "src_endpoint.ip" } }, + { "copy": { "from": "unmapped.src_endpoint.location.desc", "to": "src_endpoint.location.desc" } }, + { "copy": { "from": "unmapped.src_endpoint.location.city", "to": "src_endpoint.location.city" } }, + { "copy": { "from": "unmapped.src_endpoint.location.country", "to": "src_endpoint.location.country" } }, + { "copy": { "from": "unmapped.auth_protocol", "to": "auth_protocol" } }, + { "copy": { "from": "unmapped.mfa_factors", "to": "mfa_factors" } } + ] + } + ] + } +} \ No newline at end of file diff --git a/parsers/crowdstrike_falcon-latest b/parsers/crowdstrike_falcon-latest new file mode 100644 index 0000000..8939b32 --- /dev/null +++ b/parsers/crowdstrike_falcon-latest @@ -0,0 +1,25 @@ +{ + attributes: { + dataset: "Endpoint", + "dataSource.name": "CrowdStrike Falcon", + "dataSource.vendor": "CrowdStrike", + "dataSource.category": "security" + } + patterns: { + keyPattern: "\\w+" + lastValuePattern: "[\\w\\s]+" + }, + formats: [ + { + format: "CEF:$version$\\|$deviceVendor$\\|$deviceProduct$\\|$deviceVersion$\\|$signatureID$\\|$name$\\|$severity$\\|$extension$" + }, + { + format: ".*[\\s]$_=keyPattern$=$_$ \\w+=", + repeat: true + }, + { + format: ".*\\s$_=keyPattern$=$_=lastValuePattern$", + repeat: true + } + ] +} \ No newline at end of file diff --git a/parsers/microsoft_azuread-latest b/parsers/microsoft_azuread-latest new file mode 100644 index 0000000..b252910 --- /dev/null +++ b/parsers/microsoft_azuread-latest @@ -0,0 +1,33 @@ +{ + attributes: { + "dataSource.category": "security", + "dataSource.name": "Azure AD", + "dataSource.vendor": "Azure" + }, + formats: [ + { + format: ".*${parse=json}{attrBlacklist=(targetResources)}$" + rewrites: [ + { input: "activityDateTime", output: "security_finding.time_dt", match: ".*", replace: "$0" }, + { input: "activityDisplayName", output: "security_finding.activity_name", match: ".*", replace: "$0" }, + { input: "category", output: "security_finding.category_name", match: ".*", replace: "$0" }, + { input: "correlationId", output: "metadata.correlation_uid", match: ".*", replace: "$0" }, + { input: "id", output: "security_finding.activity_id", match: ".*", replace: "$0" }, + { input: "initiatedByUserId", output: "user.account_uid", match: ".*", replace: "$0" }, + { input: "initiatedByUserIpAddress", output: "user.ip", match: ".*", replace: "$0" }, + { input: "initiatedByUserUserPrincipalName", output: "user.name", match: ".*", replace: "$0" }, + { input: "operationType", output: "security_finding.type_name", match: ".*", replace: "$0" }, + { input: "result", output: "security_finding.result", match: ".*", replace: "$0" }, + { input: "resultReason", output: "security_finding.result_reason", match: ".*", replace: "$0" } + ] + }, { + format: ".*targetResources\":..$targetResources.{parse=json}$" + rewrites: [ + { input: "targetResources.displayName", output: "target.name", match: ".*", replace: "$0" }, + { input: "targetResources.id", output: "target.id", match: ".*", replace: "$0" }, + { input: "targetResources.type", output: "target.type", match: ".*", replace: "$0" }, + { input: "targetResources.userPrincipalName", output: "target.userName", match: ".*", replace: "$0" } + ] + } + ] +} \ No newline at end of file diff --git a/parsers/netskope_netskope_logs-latest b/parsers/netskope_netskope_logs-latest new file mode 100644 index 0000000..d7d8d4a --- /dev/null +++ b/parsers/netskope_netskope_logs-latest @@ -0,0 +1,46 @@ +{ + attributes: { + "dataSource.category": "security", + "dataSource.name": "Netskope", + "dataSource.vendor": "Netskope" + }, + formats: [ + { + format: ".*${parse=json}$" + rewrites: [ + { input: "_category_id", output: "security_finding.category_uid", match: ".*", replace: "$0" }, + { input: "_correlation_id", output: "metadata.correlation_uid", match: ".*", replace: "$0" }, + { input: "_detection_name", output: "detection.name", match: ".*", replace: "$0" }, + { input: "_event_id", output: "security_finding.activity_id", match: ".*", replace: "$0" }, + { input: "_id", output: "security_finding.type_uid", match: ".*", replace: "$0" }, + { input: "_nshostname", output: "network_endpoint.sender_hostname", match: ".*", replace: "$0" }, + { input: "_resource_name", output: "resource.name", match: ".*", replace: "$0" }, + { input: "account_name", output: "account.name", match: ".*", replace: "$0" }, + { input: "action", output: "security_finding.action", match: ".*", replace: "$0" }, + { input: "alert_id", output: "alert.uid", match: ".*", replace: "$0" }, + { input: "alert_name", output: "alert.name", match: ".*", replace: "$0" }, + { input: "alert_type", output: "event.type", match: ".*", replace: "$0" }, + { input: "device", output: "device.name", match: ".*", replace: "$0" }, + { input: "dlp_file", output: "dlp.file_name", match: ".*", replace: "$0" }, + { input: "dlp_incident_id", output: "dlp.incident_id", match: ".*", replace: "$0" }, + { input: "dlp_rule", output: "dlp.rule", match: ".*", replace: "$0" }, + { input: "dstip", output: "dst.ip.address", match: ".*", replace: "$0" }, + { input: "file_name", output: "file.name", match: ".*", replace: "$0" }, + { input: "file_size", output: "file.size", match: ".*", replace: "$0" }, + { input: "file_type", output: "file.type", match: ".*", replace: "$0" }, + { input: "hostname", output: "device.hostname", match: ".*", replace: "$0" }, + { input: "malware_name", output: "malware.name", match: ".*", replace: "$0" }, + { input: "md5", output: "file.md5", match: ".*", replace: "$0" }, + { input: "os", output: "os.name", match: ".*", replace: "$0" }, + { input: "policy", output: "policy.name", match: ".*", replace: "$0" }, + { input: "policy_id", output: "policy.uid", match: ".*", replace: "$0" }, + { input: "protocol", output: "network_connection_info.protocol_name", match: ".*", replace: "$0" }, + { input: "srcip", output: "src.ip.address", match: ".*", replace: "$0" }, + { input: "url", output: "url.text", match: ".*", replace: "$0" }, + { input: "user", output: "user.name", match: ".*", replace: "$0" }, + { input: "user_id", output: "user.uid", match: ".*", replace: "$0" }, + { input: "userip", output: "user.ip", match: ".*", replace: "$0" } + ] + } + ] +} \ No newline at end of file diff --git a/parsers/okta_authentication-latest b/parsers/okta_authentication-latest new file mode 100644 index 0000000..41b15cb --- /dev/null +++ b/parsers/okta_authentication-latest @@ -0,0 +1,39 @@ +{ + attributes: { + source: "okta" + "dataSource.category": "security", + "dataSource.name": "Okta", + "dataSource.vendor": "Okta", + }, + formats: [ + { + format: ".*${parse=dottedJson}{attrBlacklist=target}$" + rewrites: [ + { input: "actor.id", output: "user.account_uid", match: ".*", replace: "$0" }, + { input: "actor.type", output: "user.account_type", match: ".*", replace: "$0" }, + { input: "actor.alternateId", output: "user.email_addr", match: ".*", replace: "$0" }, + { input: "actor.displayName", output: "user.name", match: ".*", replace: "$0" }, + { input: "authenticationContext.authenticationStep", output: "authenticationStep", match: ".*", replace: "$0" }, + { input: "authenticationContext.externalSessionId", output: "externalSessionId", match: ".*", replace: "$0" }, + { input: "client.ipAddress", output: "client.ip", match: ".*", replace: "$0" }, + { input: "client.userAgent.browser", output: "client.browser", match: ".*", replace: "$0" }, + { input: "client.userAgent.os", output: "client.os", match: ".*", replace: "$0" }, + { input: "client.userAgent.rawUserAgent", output: "client.userAgent", match: ".*", replace: "$0" }, + { input: "client.zone", output: "client.location.zone", match: ".*", replace: "$0" }, + { input: "client.geographicalContext.city", output: "client.location.city", match: ".*", replace: "$0" }, + { input: "client.geographicalContext.country", output: "client.location.country", match: ".*", replace: "$0" }, + { input: "client.geographicalContext.geolocation.lat", output: "client.location.lat", match: ".*", replace: "$0" }, + { input: "client.geographicalContext.geolocation.lon", output: "client.location.lon", match: ".*", replace: "$0" }, + { input: "client.geographicalContext.postalCode", output: "client.location.postal_code", match: ".*", replace: "$0" }, + { input: "client.geographicalContext.state", output: "client.location.state", match: ".*", replace: "$0" }, + { input: "displayMessage", output: "msg", match: ".*", replace: "$0" }, + { input: "eventType", output: "category_name", match: ".*", replace: "$0" }, + { input: "outcome.result", output: "result", match: ".*", replace: "$0" }, + { input: "published", output: "time", match: ".*", replace: "$0" }, + { input: "transaction.id", output: "type_uid", match: ".*", replace: "$0" }, + { input: "transaction.type", output: "type_name", match: ".*", replace: "$0" }, + { input: "uuid", output: "activity_id", match: ".*", replace: "$0" } + ] + } + ] +} \ No newline at end of file diff --git a/parsers/palo b/parsers/palo new file mode 100644 index 0000000..60b5c1f --- /dev/null +++ b/parsers/palo @@ -0,0 +1,14 @@ +{ + attributes: { + "dataSource.category": "security", + "dataSource.name": "Palo Alto Networks", + "dataSource.vendor": "Palo Alto Networks" + } + formats: [ + { + id: "traffic-11-0", + format: "$network_activity.future_use_1$,$network_activity.receive_time$,$firewall.serial_number$,$network_activity.sub_type$,$timestamp$,$src.ip.address$,$dst.ip.address$,$network_endpoint.nat_src_ip$,$network_endpoint.nat_dst_ip$,$rule.name$,$user.src_name$,$user.dst_name$,$network_activity.app_name$,$network_traffic.virtual_system_name$,$source_zone$,$destination_zone$,$network_interface.inbound_name$,$network_interface.outbound_name$,$network_activity.log_action$,$session.uid$,$network_activity.repeat_count$,$network_endpoint.src_port$,$network_endpoint.dst_port$,$network_connection_info.flag$,$network_connection_info.protocol_name$,$network_activity.action$,$network_traffic.bytes$,$network_traffic.bytes_out$,$network_traffic.bytes_in$,$network_traffic.packets$,$network_activity.start_time_dt$,$network_activity.elapsed_time$,$network_activity.category_name$,$network_activity.sequence_number$,$network_activity.action_flags$,$location.src_country$,$location.dst_country$,$network_traffic.packets_out$,$network_traffic.packets_in$,$session.expiration_reason$,$device.group_hierarchy.level_1$,$device.group_hierarchy.level_2$,$device.group_hierarchy.level_3$,$device.group_hierarchy.level_4$,$firewall.virtual_system_name$,$device.name$,$network_activity.action_source$,$virtual_machine.src_vm_uuid$,$virtual_machine.dst_vm_uuid$,$device.imsi$,$device.imei$,$session.parent_uid$,$network_activity.parent_start_time_dt$,$network_connection_info.tunnel_type$,$network_connection_info.sctp_id$,$network_connection_info.sctp_chunks$,$network_connection_info.sctp_chunks_out$,$network_connection_info.sctp_chunks_in$,$rule.uid$,$network_activity.http_connection$,$network_connection_info.app_flap_count$,$policy.uid$,$network_connection_info.link_switches$,$network_connection_info.sd_wan_cluster$,$network_connection_info.sd_wan_device_type$,$network_connection_info.sd_wan_cluster_type$,$network_connection_info.sd_wan_site$,$user.groups$,$http_request.x_forwarded_for$,$device.src_type$,$device.src_profile$,$device.src_model$,$device.src_vendor_name$,$device.src_os_edition$,$device.src_os_version$,$network_connection_info.src_hostname$,$device.src_mac$,$device.dst_type$,$device.dst_profile$,$device.dst_model$,$device.dst_vendor_name$,$network_connection_info.dst_hostname$,$network_connection_info.dst_mac$,$container.id$,$container.pod_namespace$,$container.pod_name$,$network_endpoint.src_host_list$,$network_endpoint.dst_host_list$,$network_endpoint.host_id$,$device_hardware_info.serial_number$,$policy.src_group$,$policy.dst_group$,$session.owner$,$network_activity.time$,$network_activity.a_slice.service_type$,$network_activity.a_slice.differentiator$,$network_activity.sub_category$,$network_activity.app_model$,$network_activity.severity$,$network_activity.container.id$,$network_activity.app_tunnel_type$,$network_activity.is_saas$,$network_activity.is_sanctioned$,$network_activity.is_offloaded$,$network_activity.flow_type$,$network_activity.cluster.name$", + halt: true + } + ] +} \ No newline at end of file diff --git a/parsers/paloalto b/parsers/paloalto new file mode 100644 index 0000000..fa4ebec --- /dev/null +++ b/parsers/paloalto @@ -0,0 +1,13 @@ +{ + attributes: { + "dataSource.vendor": "Palo Alto Networks", + "dataSource.name": "Palo Alto Networks Prisma SASE", + "dataSource.category": "security", + } + formats: [ + { + format: "$network_traffic.log_header$,$network_traffic.log_source_uid$,$event.type$,$network_traffic.sub_type_value$,$network_traffic.config_version_value$,$network_activity.time$,$src.ip.address$,$dst.ip.address$,$nat_src.device_ip$,$nat_dst.device_ip$,$rule.name$,$user.src_name$,$user.dst_name$,$network_traffic.app_name$,$network_traffic.virtual_system_location$,$network_traffic.from_zone$,$network_traffic.to_zone$,$network_traffic.inbound_if_value$,$network_traffic.outbound_if_value$,$network_traffic.log_set$,$session.uid$,$network_traffic.repeat_count$,$network_endpoint.src_port$,$network_endpoint.dst_port$,$nat_src.port$,$nat_dst.port$,$network_traffic.flags$,$network_connection_info.protocol_name$,$network_traffic.bytes$,$network_traffic.bytes_out$,$network_traffic.bytes_in$,$network_traffic.packets$,$session.created_time$,$network_traffic.total_time_elapsed$,$url.categories$,$metadata.sequence$,$network_traffic.action_flags$,$location.src_region$,$location.dst_region$,$network_traffic.packets_out$,$network_traffic.packets_in$,$network_traffic.session_end_reason_value$,$network_traffic.dg_hier_level_1$,$network_traffic.dg_hier_level_2$,$network_traffic.dg_hier_level_3$,$network_traffic.dg_hier_level_4$,$network_traffic.virtual_system_name$,$endpoint.name$,$network_traffic.action_source_value$,$source.uuid$,$destination.uuid$,$network_traffic.tunnel_id_imsi$,$network_traffic.monitor_tag_imei$,$session.parent_id$,$session.parent_start_time$,$network_traffic.tunnel_value$,$network_traffic.ep_association_uid$,$network_traffic.chunks$,$network_traffic.chunks_out$,$network_traffic.chunks_in$,$rule.uid$,$network_traffic.http2_connection$,$network_traffic.link_change_count$,$policy.uid$,$network_traffic.link_switches$,$network_traffic.sdwan_cluster$,$network_traffic.sdwan_device_type$,$network_traffic.sdwan_cluster_type$,$network_traffic.sdwan_site$,$network_traffic.dynusergroup_name$,$http_request.x_forwarded_for$,$source_device.category_name$,$source_device.profile$,$source_device.model$,$source_device.vendor_name$,$source_device.os_name$,$source_device.os_version$,$source_device.hostname$,$source_device.mac$,$destination_device.category_name$,$destination_device.profile$,$destination_device.model$,$destination_device.vendor_name$,$destination_device.os_name$,$destination_device.os_version$,$destination_device.hostname$,$destination_device.mac$,$container.uid$,$network_traffic.pod_namespace$,$network_traffic.pod_name$,$network_traffic.source_edl$,$network_traffic.destination_edl$,$host.uid$,$endpoint.serial_number$,$network_traffic.source_dynamic_address_group$,$network_traffic.destination_dynamic_address_group$,$network_traffic.ha_session_owner$,$network_traffic.timestamp_generated_high_res$,$network_traffic.nssai_network_slice_type_value$,$network_traffic.nssai_network_slice_differentiator_value$", + halt: true, + } + ] +} \ No newline at end of file