NOX Framework — {

#!/usr/bin/env python3 """ NOX — Cyber Threat Intelligence Framework Async core | 120+ breach sources | Risk scoring | Identity graphing | HVT detection """ import asyncio import hashlib import html as html_module import json import sys as _sys # ── Global namespace injection — location-agnostic path anchor ───────── # Resolves the package root whether NOX is run from /usr/bin, /home, or /tmp. # Canonical install: /usr/lib/python3/dist-packages/nox/nox.py # Dev/source run: /nox.py import pathlib as _pl _SCRIPT_DIR = _pl.Path(__file__).resolve().parent _INSTALL_PKG = _pl.Path("/usr/lib/python3/dist-packages/nox") _PKG_ROOT = _INSTALL_PKG if _SCRIPT_DIR == _INSTALL_PKG else _SCRIPT_DIR if str(_PKG_ROOT) not in _sys.path: _sys.path.insert(0, str(_PKG_ROOT)) # ── Credential helper (XDG JSON store) ──────────────────────────────── try: from sources.helpers.config_handler import ( # type: ignore ConfigManager as _ExtConfigManager, UNIVERSAL_PLACEHOLDER, SERVICE_REGISTRY, ) _HAS_CONFIG_HANDLER = True except ImportError: _HAS_CONFIG_HANDLER = False UNIVERSAL_PLACEHOLDER = "INSERT_API_KEY_HERE" SERVICE_REGISTRY = {} _ExtConfigManager = None try: from sources.helpers.cracker import detect_hash # type: ignore _HAS_CRACKER = True except ImportError: _HAS_CRACKER = False def detect_hash(v): # type: ignore return None try: from sources.helpers.scanner import AvalancheScanner # type: ignore _HAS_AVALANCHE = True except ImportError: _HAS_AVALANCHE = False AvalancheScanner = None # type: ignore try: from sources.helpers.reporting import ( # type: ignore to_json as _rep_json, to_html as _rep_html, to_pdf as _rep_pdf, ) _HAS_REPORTING = True except ImportError: _HAS_REPORTING = False import os import random import re import sys import time import threading _PROXY_ENV_LOCK = threading.Lock() import argparse import csv import logging import math import tempfile import urllib.parse import urllib.request import urllib.error import http.cookiejar import gzip import ssl import base64 from abc import ABC, abstractmethod from contextlib import contextmanager aiosqlite = None try: import aiosqlite as _aiosqlite aiosqlite = _aiosqlite except ImportError: pass import sqlite3 as _sqlite3_fallback from dataclasses import dataclass, field, asdict from datetime import datetime, timezone from enum import Enum, auto from pathlib import Path from typing import Dict, List, Optional, Set, Any, Tuple OPTIONAL: Dict[str, Any] = {} def _try_import(name: str, pkg: str = None): try: m = __import__(pkg or name) OPTIONAL[name] = m return m except ImportError: return None aiohttp_mod = _try_import("aiohttp") bs4 = _try_import("bs4", "bs4") BeautifulSoup = getattr(bs4, "BeautifulSoup", None) if bs4 else None cloudscraper = _try_import("cloudscraper") stem = _try_import("stem") colorama = _try_import("colorama") rich_mod = _try_import("rich") phonenumbers = _try_import("phonenumbers") requests = _try_import("requests") try: from weasyprint import HTML as _WP_HTML weasyprint = _WP_HTML except ImportError: weasyprint = None if colorama: colorama.init(autoreset=True) try: from importlib.metadata import version as _pkg_version VERSION = _pkg_version("nox-cli") except Exception: # Fallback: read directly from pyproject.toml (dev/source run) try: import tomllib as _toml # Python 3.11+ except ImportError: try: import tomli as _toml # type: ignore except ImportError: _toml = None # type: ignore if _toml: try: with open(_pl.Path(__file__).resolve().parent / "pyproject.toml", "rb") as _f: VERSION = _toml.load(_f)["project"]["version"] except Exception: VERSION = "1.0.0" else: VERSION = "1.0.0" if VERSION == "1.0.0": try: import subprocess as _sp2 VERSION = _sp2.check_output(["dpkg-query", "-W", "-f=${Version}", "nox-cli"], stderr=_sp2.DEVNULL).decode().strip() or VERSION except Exception: pass BUILD_DATE = "2026-04-14" # ── Smart Path Layout ────────────────────────────────────────────────── HOME_NOX = Path.home() / ".nox" LOG_DIR = HOME_NOX / "logs" REPORT_DIR = HOME_NOX / "reports" SOURCE_DIR = HOME_NOX / "sources" VAULT_DIR = HOME_NOX / "vault" # XDG config dir — canonical location for apikeys, system log _XDG_CFG = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) / "nox-cli" SYSLOG_DIR = _XDG_CFG / "logs" def initialize_environment() -> None: """ Create ~/.nox directory tree, seed sources from the script location or /usr/share/nox-cli/sources/ if the user sources dir is empty, and fix ownership when the directory was previously created by root (sudo). Creates a default config.ini on first run if not present. """ import shutil # Create all required directories PROVIDER_DIR = HOME_NOX / "providers" for d in (HOME_NOX, LOG_DIR, REPORT_DIR, SOURCE_DIR, VAULT_DIR, PROVIDER_DIR): d.mkdir(mode=0o755, parents=True, exist_ok=True) # Ownership fix: if run as root previously, re-own to the real user real_uid = int(os.environ.get("SUDO_UID", os.getuid())) real_gid = int(os.environ.get("SUDO_GID", os.getgid())) if os.getuid() == 0 and real_uid != 0: for d in (HOME_NOX, LOG_DIR, REPORT_DIR, SOURCE_DIR, VAULT_DIR): try: os.chown(d, real_uid, real_gid) except OSError: pass # Create default config.ini on first run _default_cfg = HOME_NOX / "config.ini" if not _default_cfg.exists(): import configparser as _cp cfg = _cp.ConfigParser() cfg["settings"] = { "concurrency": "20", "timeout": "30", "stealth": "true", "rate_limit_lo": "0.5", "rate_limit_hi": "2.0", } cfg["api_keys"] = {} with open(_default_cfg, "w") as fh: cfg.write(fh) # Smart source discovery: seed ~/.nox/sources/ from package sources/ # Only copies files that are absent — never overwrites user-customised sources. # Use --reset-sources to force a full resync. candidate = _PKG_ROOT / "sources" if not candidate.is_dir(): candidate = Path("/usr/share/nox-cli/sources") if candidate.is_dir(): for jf in candidate.glob("*.json"): dst = SOURCE_DIR / jf.name try: if not dst.exists(): shutil.copy2(jf, dst) except OSError: pass # ── Static Configuration ─────────────────────────────────────────────── class Cfg: TIMEOUT = 30 RETRIES = 3 RETRY_DELAY = 2 CONCURRENCY = 20 RATE_LIMIT = (0.5, 2.0) TOR_SOCKS = 9050 TOR_CTRL = 9051 TOR_PASS = "" STEALTH = True BASE = HOME_NOX DB = HOME_NOX / "nox_cache.db" REPORTS = REPORT_DIR LOGS = LOG_DIR WORDLISTS = HOME_NOX / "wordlists" CACHE_TTL = 86400 DORK_MAX = 50 DORK_DELAY = (0.5, 2.0) PASTE_MAX = 100 PASTE_DELAY = (1.0, 3.0) PIVOT_DEPTH = 2 PIVOT_CONFIDENCE = 0.70 # Browser-grade TLS cipher suite for JA3 fingerprint matching TLS_CIPHERS = ( "TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:" "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:" "ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:" "ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:" "ECDHE-RSA-AES128-SHA:ECDHE-RSA-AES256-SHA:" "AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA:AES256-SHA" ) @classmethod def init(cls) -> None: for d in [cls.REPORTS, cls.LOGS, cls.WORDLISTS]: d.mkdir(parents=True, exist_ok=True) cls.BASE.mkdir(parents=True, exist_ok=True) Cfg.init() # ── Runtime Configuration ────────────────────────────────────────────── class NoxConfig: def __init__(self) -> None: self.use_tor = False self.proxy = None self.concurrency = Cfg.CONCURRENCY self.timeout = Cfg.TIMEOUT self.stealth = Cfg.STEALTH self.rate_limit = Cfg.RATE_LIMIT self.tor_socks = Cfg.TOR_SOCKS self.tor_ctrl = Cfg.TOR_CTRL self.tor_pass = Cfg.TOR_PASS self.allow_leak = False self.no_online_crack = False self.max_threads = Cfg.CONCURRENCY self.no_pivot = False self.pivot_depth = Cfg.PIVOT_DEPTH # ── Logging ──────────────────────────────────────────────────────────── LOG_DIR.mkdir(parents=True, exist_ok=True) SYSLOG_DIR.mkdir(parents=True, exist_ok=True) logger = logging.getLogger("nox") if not logger.handlers: logger.setLevel(logging.DEBUG) logger.propagate = False _fh = logging.FileHandler(str(LOG_DIR / "nox.log")) _fh.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")) _fh.setLevel(logging.DEBUG) logger.addHandler(_fh) # Terminal: WARNING and above only — no debug/info noise _sh = logging.StreamHandler() _sh.setLevel(logging.WARNING) _sh.setFormatter(logging.Formatter("[%(levelname)s] %(message)s")) logger.addHandler(_sh) # ── System event log: API status, rate-limits, crack attempts ───────── # Writes to ~/.config/nox-cli/logs/nox_system.log — never to terminal _syslog = logging.getLogger("nox.system") if not _syslog.handlers: _syslog.setLevel(logging.INFO) _sfh = logging.FileHandler(str(SYSLOG_DIR / "nox_system.log")) _sfh.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")) _syslog.addHandler(_sfh) _syslog.propagate = False # ── Colors / Console ─────────────────────────────────────────────────── class C: R = "\033[91m"; G = "\033[92m"; Y = "\033[93m"; B = "\033[94m" P = "\033[95m"; CY = "\033[96m"; W = "\033[97m"; GR = "\033[90m" O = "\033[38;5;208m"; BD = "\033[1m"; DM = "\033[2m"; X = "\033[0m" @staticmethod def c(t: str, color: str = "W") -> str: m = { "red": C.R, "green": C.G, "yellow": C.Y, "blue": C.B, "purple": C.P, "cyan": C.CY, "white": C.W, "gray": C.GR, "orange": C.O, "bold": C.BD, "dim": C.DM, } return f"{m.get(color, C.W)}{t}{C.X}" class Console: ICONS = { "breach": f"{C.R}[!]{C.X}", "pass": f"{C.Y}[*]{C.X}", "hash": f"{C.P}[#]{C.X}", "net": f"{C.B}[~]{C.X}", "stealth": f"{C.GR}[^]{C.X}", "ok": f"{C.G}[+]{C.X}", "err": f"{C.R}[-]{C.X}", "warn": f"{C.Y}[!]{C.X}", "info": f"{C.CY}[i]{C.X}", "db": f"{C.B}[D]{C.X}", "report": f"{C.G}[R]{C.X}", "dork": f"{C.O}[G]{C.X}", "paste": f"{C.P}[P]{C.X}", "scrape": f"{C.B}[S]{C.X}", "combo": f"{C.R}[C]{C.X}", "pivot": f"{C.CY}[↻]{C.X}", } @staticmethod def s(msg: str, icon: str = "info") -> None: print(f" {Console.ICONS.get(icon, Console.ICONS['info'])} {msg}") @staticmethod def ok(msg: str) -> None: Console.s(msg, "ok") @staticmethod def err(msg: str) -> None: Console.s(msg, "err") @staticmethod def warn(msg: str) -> None: Console.s(msg, "warn") @staticmethod def dim(msg: str) -> None: pass # file logging handled by out() @staticmethod def section(title: str) -> None: print(f"\n {C.c('='*58,'purple')}\n {C.c(f' {title}','bold')}\n {C.c('='*58,'purple')}") @staticmethod def table(headers: List[str], rows: List[List], title: str = None) -> None: if title: print(f"\n {C.c(title,'bold')}") if not rows: print(f" {C.c('(empty)','gray')}") return widths = [ max(len(str(h)), max((len(str(r[i])) for r in rows), default=0)) for i, h in enumerate(headers) ] hdr = " | ".join(C.c(str(h).ljust(widths[i]), "cyan") for i, h in enumerate(headers)) print(f" {hdr}\n {'-+-'.join('-'*w for w in widths)}") for row in rows: print(f" {' | '.join(str(row[i]).ljust(widths[i]) for i in range(len(headers)))}") @staticmethod def progress(cur: int, tot: int, prefix: str = "Progress", w: int = 30) -> None: if tot == 0: return p = cur / tot f = int(w * p) bar = C.c("█" * f, "green") + C.c("░" * (w - f), "gray") print(f"\r {prefix} [{bar}] {C.c(f'{p:.0%}','cyan')} ({cur}/{tot})", end="", flush=True) if cur >= tot: print() _ANSI_RE = re.compile(r"\x1b\[[0-9;]*m") def out(level: str, msg: str) -> None: fn = getattr(Console, level, None) if fn: fn(msg) else: Console.s(msg) # Mirror every terminal message to the log file so users can audit the full run. clean = _ANSI_RE.sub("", msg) if level in ("err",): logger.error("[%s] %s", level, clean) elif level in ("warn",): logger.warning("[%s] %s", level, clean) elif level in ("ok", "info", "pivot", "breach", "scrape", "dork", "paste"): logger.info("[%s] %s", level, clean) else: logger.debug("[%s] %s", level, clean) # ── Data Models ──────────────────────────────────────────────────────── class Severity(Enum): CRITICAL = auto() HIGH = auto() MEDIUM = auto() LOW = auto() INFO = auto() # ── Intelligence constants ───────────────────────────────────────────── _SRC_CONFIDENCE: Dict[str, float] = { "HIBP": 1.0, "HudsonRock": 0.95, "SpyCloud": 0.92, "RecordedFuture": 0.90, "Dehashed": 0.88, "WhiteIntel": 0.88, "CyberSixGill": 0.87, "FlareIO": 0.85, "DarkTracer": 0.85, "IntelX": 0.83, "SOCRadar": 0.82, "LeakCheck": 0.80, "BreachSense": 0.80, "DataViper": 0.78, "Snusbase": 0.75, "WeLeakInfo": 0.75, "LeakLookup": 0.72, "LeakLookupV2": 0.72, "BulkLeakLookup": 0.70, "Scylla": 0.68, "DeepSearch": 0.65, "BreachDirectory": 0.65, "LeakPeek": 0.65, "LeakSearch": 0.63, "CheckLeaked": 0.62, "Antipublic": 0.60, "GhostProject": 0.60, "LeakedSite": 0.58, "LeakedPassword": 0.58, "NuclearLeaks": 0.55, "ProxyNovaCOMB": 0.55, "CredStuffDB": 0.55, "ComboList": 0.55, "PwnDB": 0.52, "LeakOSINT": 0.52, "Pentester": 0.50, "HunterIO": 0.70, "FullContact": 0.68, "PeopleDataLabs": 0.68, "ZeroBounce": 0.65, "RocketReach": 0.62, "Gravatar": 0.45, "EmailRep": 0.55, "Holehe": 0.50, "NameCheck": 0.45, "FirefoxMonitor": 0.60, "AvastHackCheck": 0.55, "Inoitsu": 0.50, "BreachAlarm": 0.50, "HaveIBeenSold": 0.55, "CyberNews": 0.55, "XposedOrNot": 0.60, "AshleyMadison": 0.70, "Shodan": 0.80, "Censys": 0.78, "BinaryEdge": 0.75, "SecurityTrails": 0.75, "FullHunt": 0.72, "Netlas": 0.70, "ZoomEye": 0.70, "Onyphe": 0.68, "VirusTotal": 0.85, "AlienVaultOTX": 0.80, "Pulsedive": 0.72, "ThreatCrowd": 0.65, "Maltiverse": 0.65, "PassiveTotal": 0.75, "AbuseIPDB": 0.78, "GreyNoise": 0.75, "MXToolbox": 0.65, "WhoisXML": 0.60, "URLScan": 0.65, "ExploitDB": 0.70, "ThreatBook": 0.68, "Huntress": 0.72, "StealerLogSearch": 0.90, "IntelXPhone": 0.80, "IntelFinder": 0.75, "BreachForumsIntel": 0.60, "RaidForumsArchive": 0.55, "OGUsers": 0.50, "Cracked.to": 0.55, "Nulled.to": 0.55, "DarkWebTor": 0.50, "WikiLeaks": 0.75, "RansomWatch": 0.85, "DataBreaches.net": 0.55, "PastebinIntel": 0.35, "PasteHunter": 0.35, "ScrapeEngine": 0.30, "TelegramOSINT": 0.30, "GoogleDork": 0.30, "SynapsInt": 0.40, "WaybackMachine": 0.40, "BuiltWith": 0.40, "CertStream": 0.45, "GitLeaks": 0.65, "SPF/DMARC": 0.40, "Picostatus": 0.30, "LeakedDomains": 0.60, "Leakix": 0.72, "PhoneInfo": 0.55, "Numverify": 0.60, "TrueCaller": 0.65, "Hashmob": 0.95, "HashKiller": 0.90, "HashesOrg": 0.90, "LeakLookupHash": 0.80, } _STEALER_TAGS = {"stealer", "redline", "raccoon", "vidar", "infostealer", "lumma", "azorult", "stealc"} _FAST_HASHES = {"md5", "sha1", "sha256", "ntlm", "lm"} _CORP_PW_RE = re.compile(r"(?i)([A-Z][a-z]{2,})(20\d{2}|19\d{2})[!@#$%^&*]?$") _VIP_EMAIL_RE = re.compile(r"(?i)(admin|administrator|root|ceo|cto|ciso|cfo|vp|director|manager|sysadmin|devops|security|infosec|noc|soc)") _VIP_DOM_RE = re.compile(r"\.(gov|mil|edu|police|gouv|gob)(\.[a-z]{2})?$", re.I) _HVT_KEYWORDS = frozenset({ "admin", "administrator", "root", "ceo", "cto", "ciso", "cfo", "vp", "director", "manager", "sysadmin", "devops", "security", "infosec", "noc", "soc", "superuser", "sa", "dba", "ops", }) _HVT_DOMAINS = re.compile( r"\.(gov|mil|int|police|gouv|gob|gc\.ca|gov\.uk|mod\.uk)(\.[a-z]{2})?$", re.IGNORECASE, ) _INTEL_SCHEMA = """ CREATE TABLE IF NOT EXISTS identities ( id INTEGER PRIMARY KEY AUTOINCREMENT, primary_id TEXT NOT NULL UNIQUE, emails TEXT DEFAULT '[]', usernames TEXT DEFAULT '[]', phones TEXT DEFAULT '[]', max_risk REAL DEFAULT 0.0, is_hvt INTEGER DEFAULT 0, pivot_count TEXT DEFAULT '{}', ts REAL DEFAULT (strftime('%s','now')) ); CREATE TABLE IF NOT EXISTS leaks ( id INTEGER PRIMARY KEY AUTOINCREMENT, identity_id INTEGER REFERENCES identities(id) ON DELETE CASCADE, source TEXT, email TEXT, username TEXT, password TEXT, password_hash TEXT, hash_type TEXT, phone TEXT, breach_name TEXT, breach_date TEXT, risk_score REAL DEFAULT 0, source_conf REAL DEFAULT 0.5, data_types TEXT DEFAULT '[]', is_hvt INTEGER DEFAULT 0, dedup_hash TEXT UNIQUE, ts REAL DEFAULT (strftime('%s','now')) ); CREATE TABLE IF NOT EXISTS correlation_links ( id INTEGER PRIMARY KEY AUTOINCREMENT, identity_id INTEGER REFERENCES identities(id) ON DELETE CASCADE, pivot_type TEXT, pivot_value TEXT, linked_ids TEXT DEFAULT '[]', ts REAL DEFAULT (strftime('%s','now')) ); CREATE TABLE IF NOT EXISTS query_cache ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT NOT NULL UNIQUE, qtype TEXT, scanned REAL DEFAULT (strftime('%s','now')) ); CREATE INDEX IF NOT EXISTS idx_leaks_email ON leaks(email); CREATE INDEX IF NOT EXISTS idx_leaks_identity ON leaks(identity_id); CREATE INDEX IF NOT EXISTS idx_leaks_risk ON leaks(risk_score DESC); CREATE INDEX IF NOT EXISTS idx_leaks_dedup ON leaks(dedup_hash); CREATE INDEX IF NOT EXISTS idx_ident_hvt ON identities(is_hvt); CREATE INDEX IF NOT EXISTS idx_cache_query ON query_cache(query); CREATE TABLE IF NOT EXISTS intel_records ( id INTEGER PRIMARY KEY AUTOINCREMENT, source TEXT, target TEXT, email TEXT, password TEXT, phone TEXT, address TEXT, full_name TEXT, fingerprint TEXT UNIQUE ); CREATE TABLE IF NOT EXISTS dork_results ( id INTEGER PRIMARY KEY AUTOINCREMENT, source_url TEXT UNIQUE, file_type TEXT, metadata_json TEXT, parent_target TEXT, ts REAL DEFAULT (strftime('%s','now')) ); """ def _parse_breach_date(raw: str) -> Optional[datetime]: if not raw: return None raw = raw.strip() for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"): try: return datetime.strptime(raw[:19], fmt).replace(tzinfo=timezone.utc) except ValueError: pass m = re.search(r"(\d{2})/(\d{2})/(\d{4})", raw) if m: # Try MM/DD/YYYY first, then DD/MM/YYYY (European format) for month, day in [(int(m.group(1)), int(m.group(2))), (int(m.group(2)), int(m.group(1)))]: try: return datetime(int(m.group(3)), month, day, tzinfo=timezone.utc) except ValueError: pass m = re.fullmatch(r"(\d{4})", raw) if m: return datetime(int(m.group(1)), 1, 1, tzinfo=timezone.utc) return None # ── Shared helpers ───────────────────────────────────────────────────── def _rec_get(r: Any, k: str) -> Any: return r.get(k, "") if isinstance(r, dict) else getattr(r, k, "") def _is_vip(r: Any) -> bool: ident = _rec_get(r, "email") or _rec_get(r, "username") return bool(_VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident)) def _is_stealer(r: Any) -> bool: dt = _rec_get(r, "data_types") or [] combined = ( (" ".join(dt) if isinstance(dt, list) else str(dt)).lower() + _rec_get(r, "source").lower() ) return any(t in combined for t in _STEALER_TAGS) # ── Record dataclass ─────────────────────────────────────────────────── @dataclass class Record: source: str email: str = "" username: str = "" password: str = "" password_hash: str = "" hash_type: str = "" ip_address: str = "" phone: str = "" name: str = "" domain: str = "" breach_date: str = "" breach_name: str = "" data_types: List[str] = field(default_factory=list) severity: Severity = Severity.MEDIUM raw_data: Dict = field(default_factory=dict) verified: bool = False timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) risk_score: float = 0.0 source_confidence: float = 0.5 is_hvt: bool = False persistence_score: float = 0.0 address: str = "" full_name: str = "" metadata: Dict = field(default_factory=dict) def to_dict(self) -> Dict: d = asdict(self) d["severity"] = self.severity.name return d def dedup_key(self) -> str: """SHA-256 of normalised email:password for cross-source deduplication.""" em = (self.email or self.username or "").lower().strip() pw = (self.password or self.password_hash or "").strip() return hashlib.sha256(f"{em}:{pw}".encode()).hexdigest() def get_fingerprint(self) -> str: """Return a SHA-256 fingerprint for cross-source deduplication.""" data_str = f"{self.source}|{self.email}|{self.password}|{self.phone}|{self.address}" return hashlib.sha256(data_str.encode()).hexdigest() # ── Risk Engine ──────────────────────────────────────────────────────── class RiskEngine: """ Predictive risk scoring engine (0–100). Temporal Correlation & Exposure Scoring: - Persistence Score: multiplier when data appears across multiple distinct datasets in different years. - Exposure Recency: exponential multiplier for recent breaches. """ _DECAY_BOOST_DAYS = 365 _DECAY_MID_DAYS = 730 _DECAY_PENALTY_DAYS = 1825 @staticmethod def score(record: "Record") -> "Record": conf = _SRC_CONFIDENCE.get(record.source, 0.5) record.source_confidence = conf dtypes_str = " ".join(record.data_types).lower() if record.data_types else "" src_lower = record.source.lower() is_stealer = any(t in dtypes_str or t in src_lower for t in _STEALER_TAGS) if is_stealer and record.password: record.risk_score = 100.0 record.severity = Severity.CRITICAL return record pts = 0.0 if record.password: pts += 60 # Adjust base points by password complexity. # Weak passwords score lower; strong ones score higher. try: _pa_score = PassAnalyzer().analyze(record.password).get("score", 50) if _pa_score < 30: pts = max(0.0, pts - 15) elif _pa_score > 80: pts = min(100.0, pts + 10) except Exception: pass elif record.password_hash: ht = (record.hash_type or "").lower() pts += 30 if ht in _FAST_HASHES else 15 else: pts += 5 dt = _parse_breach_date(record.breach_date) if dt: age_days = (datetime.now(timezone.utc) - dt).days if age_days < RiskEngine._DECAY_BOOST_DAYS: # Exponential recency multiplier recency_factor = 1.0 + 0.5 * math.exp(-age_days / 180) pts = pts * recency_factor + 30 elif age_days < RiskEngine._DECAY_MID_DAYS: pts += 15 elif age_days > RiskEngine._DECAY_PENALTY_DAYS: pts = max(0.0, pts - 20) pts *= 0.5 + conf * 0.5 ident = record.email or record.username or "" local = ident.split("@")[0].lower() if "@" in ident else ident.lower() domain_part = ident.split("@")[1].lower() if "@" in ident else "" if ( any(kw in local for kw in _HVT_KEYWORDS) or (_HVT_DOMAINS.search(domain_part) if domain_part else False) or _VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident) ): pts = min(100.0, pts + 15) record.risk_score = round(min(pts, 100.0), 1) rs = record.risk_score if rs >= 90: record.severity = Severity.CRITICAL elif rs >= 70: record.severity = Severity.HIGH elif rs >= 40: record.severity = Severity.MEDIUM elif rs >= 10: record.severity = Severity.LOW else: record.severity = Severity.INFO return record @staticmethod def apply_persistence(records: List["Record"]) -> List["Record"]: """ Assign a Persistence Score when the same identity appears across multiple distinct breach datasets in different calendar years. """ identity_years: Dict[str, Set[int]] = {} identity_sources: Dict[str, Set[str]] = {} for r in records: ident = (r.email or r.username or "").lower() if not ident: continue identity_sources.setdefault(ident, set()).add(r.source) dt = _parse_breach_date(r.breach_date) if dt: identity_years.setdefault(ident, set()).add(dt.year) for r in records: ident = (r.email or r.username or "").lower() if not ident: continue years = identity_years.get(ident, set()) sources = identity_sources.get(ident, set()) if len(years) >= 2 and len(sources) >= 2: span = max(years) - min(years) if years else 0 r.persistence_score = round(min(100.0, len(sources) * 10 + span * 5), 1) r.risk_score = round(min(100.0, r.risk_score + r.persistence_score * 0.3), 1) return records # ── Identity Graphing & Correlation ─────────────────────────────────── @dataclass class TargetProfile: """Unified identity profile built by IdentityResolver.""" primary_id: str emails: List[str] = field(default_factory=list) usernames: List[str] = field(default_factory=list) phones: List[str] = field(default_factory=list) records: list = field(default_factory=list) pivot_count: Dict[str, int] = field(default_factory=dict) max_risk: float = 0.0 is_hvt: bool = False stuffing_risk: str = "LOW" def _add(self, rec: Any) -> None: self.records.append(rec) self.max_risk = max(self.max_risk, float(_rec_get(rec, "risk_score") or 0.0)) pw = _rec_get(rec, "password") usr = _rec_get(rec, "username") ph = _rec_get(rec, "phone") for val in filter(None, [ pw if pw and len(pw) > 6 else None, usr or None, ph or None, ]): self.pivot_count[val] = self.pivot_count.get(val, 0) + 1 em = _rec_get(rec, "email") if em and em not in self.emails: self.emails.append(em) if usr and usr not in self.usernames: self.usernames.append(usr) if ph and ph not in self.phones: self.phones.append(ph) ident = em or usr or "" if _VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident): self.is_hvt = True def _compute_stuffing_risk(self) -> None: max_reuse = max(self.pivot_count.values(), default=0) if max_reuse >= 5: self.stuffing_risk = "CRITICAL" elif max_reuse >= 3: self.stuffing_risk = "HIGH" elif max_reuse >= 2: self.stuffing_risk = "MEDIUM" else: self.stuffing_risk = "LOW" class IdentityResolver: """Links breach records into unified TargetProfile clusters via Union-Find.""" def __init__(self, records: list) -> None: self._records = records def resolve(self) -> List[TargetProfile]: parent: Dict[str, str] = {} pivot_map: Dict[str, str] = {} def _root(x: str) -> str: while parent.get(x, x) != x: parent[x] = parent.get(parent.get(x, x), x) x = parent.get(x, x) return x def _union(a: str, b: str) -> None: ra, rb = _root(a), _root(b) if ra != rb: parent[rb] = ra for rec in self._records: node = ( _rec_get(rec, "email") or _rec_get(rec, "username") or _rec_get(rec, "phone") or _rec_get(rec, "source") ) if not node: continue parent.setdefault(node, node) pw = _rec_get(rec, "password") for pv in filter(None, [ _rec_get(rec, "email") or None, _rec_get(rec, "username") or None, _rec_get(rec, "phone") or None, pw if pw and len(pw) > 6 else None, ]): if pv in pivot_map: _union(node, pivot_map[pv]) else: pivot_map[pv] = node clusters: Dict[str, TargetProfile] = {} for rec in self._records: node = ( _rec_get(rec, "email") or _rec_get(rec, "username") or _rec_get(rec, "phone") or _rec_get(rec, "source") ) if not node: continue root = _root(node) if root not in clusters: clusters[root] = TargetProfile(primary_id=root) clusters[root]._add(rec) for profile in clusters.values(): profile._compute_stuffing_risk() return sorted(clusters.values(), key=lambda p: -p.max_risk) # ── HVT Analyzer ────────────────────────────────────────────────────── class HVTAnalyzer: """High-Value Target & VIP detection module.""" @staticmethod def is_hvt(record: Any) -> bool: ident = _rec_get(record, "email") or _rec_get(record, "username") or "" local = ident.split("@")[0].lower() if "@" in ident else ident.lower() domain_part = ident.split("@")[1].lower() if "@" in ident else "" if any(kw in local for kw in _HVT_KEYWORDS): return True if domain_part and _HVT_DOMAINS.search(domain_part): return True if _VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident): return True return False @staticmethod def filter_hvt(records: list) -> list: hvt = [r for r in records if HVTAnalyzer.is_hvt(r)] return sorted(hvt, key=lambda r: _rec_get(r, "risk_score") or 0, reverse=True) @staticmethod def annotate(records: list) -> list: for rec in records: flag = HVTAnalyzer.is_hvt(rec) if isinstance(rec, dict): rec["is_hvt"] = flag else: rec.is_hvt = flag return records # ── Forensic Persistence Layer ───────────────────────────────────────── class DatabaseManager: """ Async aiosqlite persistence layer for CTI data with 24 h query cache and SHA-256 deduplication. Falls back to synchronous sqlite3 when aiosqlite is not installed. """ def __init__(self, path: Optional[str] = None) -> None: self.path = path or str(HOME_NOX / "nox_cache.db") self._use_async = aiosqlite is not None # Initialise schema synchronously so the constructor stays non-async. self._init_sync() # ── Schema bootstrap ────────────────────────────────────────────── def _init_sync(self) -> None: con = _sqlite3_fallback.connect(self.path, timeout=15) con.execute("PRAGMA journal_mode=WAL") # Run column migrations before applying full schema (handles existing DBs) _migrations = [ "ALTER TABLE leaks ADD COLUMN dedup_hash TEXT", "CREATE UNIQUE INDEX IF NOT EXISTS idx_leaks_dedup_unique ON leaks(dedup_hash) WHERE dedup_hash IS NOT NULL", ] for stmt in _migrations: try: con.execute(stmt) con.commit() except _sqlite3_fallback.OperationalError: pass # column already exists or table doesn't exist yet con.executescript(_INTEL_SCHEMA) con.commit() con.close() # ── Public async API ────────────────────────────────────────────── async def get_cached(self, query: str) -> Optional[List[dict]]: q_lower = query.lower() if self._use_async: async with aiosqlite.connect(self.path, timeout=15) as db: db.row_factory = aiosqlite.Row await db.execute("PRAGMA journal_mode=WAL") async with db.execute( "SELECT id, scanned FROM query_cache WHERE query=?", (q_lower,) ) as cur: row = await cur.fetchone() if not row: return None if datetime.now(timezone.utc).timestamp() - row["scanned"] > Cfg.CACHE_TTL: return None async with db.execute( "SELECT * FROM leaks WHERE email=? OR username=?", (q_lower, q_lower), ) as cur: rows = await cur.fetchall() return [dict(r) for r in rows] else: return self._get_cached_sync(q_lower) async def cache_records(self, query: str, qtype: str, records: list) -> None: if self._use_async: await self._cache_records_async(query, qtype, records) else: self._cache_records_sync(query, qtype, records) async def save_correlations(self, query: str, profiles: List[TargetProfile]) -> None: if self._use_async: await self._save_correlations_async(profiles) else: self._save_correlations_sync(profiles) async def save_record(self, r: "Record") -> None: if self._use_async: async with aiosqlite.connect(self.path, timeout=15) as db: await db.execute( "INSERT OR IGNORE INTO intel_records " "(source, target, email, password, phone, address, full_name, fingerprint) " "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", (r.source, getattr(r, "target", ""), r.email, r.password, r.phone, r.address, r.full_name, r.get_fingerprint()), ) await db.commit() else: with _sqlite3_fallback.connect(self.path, timeout=15) as db: db.execute( "INSERT OR IGNORE INTO intel_records " "(source, target, email, password, phone, address, full_name, fingerprint) " "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", (r.source, getattr(r, "target", ""), r.email, r.password, r.phone, r.address, r.full_name, r.get_fingerprint()), ) async def get_hvt_identities(self) -> List[dict]: if self._use_async: async with aiosqlite.connect(self.path, timeout=15) as db: db.row_factory = aiosqlite.Row await db.execute("PRAGMA journal_mode=WAL") async with db.execute( "SELECT * FROM identities WHERE is_hvt=1 ORDER BY max_risk DESC" ) as cur: rows = await cur.fetchall() return [dict(r) for r in rows] else: return self._get_hvt_sync() # ── Async implementations ───────────────────────────────────────── async def _cache_records_async(self, query: str, qtype: str, records: list) -> None: seen_hashes: Set[str] = set() async with aiosqlite.connect(self.path, timeout=15) as db: db.row_factory = aiosqlite.Row await db.execute("PRAGMA journal_mode=WAL") try: await db.execute( "INSERT OR REPLACE INTO query_cache (query, qtype) VALUES (?,?)", (query.lower(), qtype), ) for rec in records: dk = rec.dedup_key() if hasattr(rec, "dedup_key") else "" if dk and dk in seen_hashes: continue if dk: seen_hashes.add(dk) ident = rec.email or rec.username or rec.phone or query is_hvt = int(bool(_VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident))) await db.execute( "INSERT OR IGNORE INTO identities (primary_id, is_hvt) VALUES (?,?)", (ident, is_hvt), ) async with db.execute( "SELECT id FROM identities WHERE primary_id=?", (ident,) ) as cur: row = await cur.fetchone() if not row: continue iid = row["id"] await db.execute( """INSERT INTO leaks (identity_id, source, email, username, password, password_hash, hash_type, phone, breach_name, breach_date, risk_score, source_conf, data_types, is_hvt, dedup_hash) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", ( iid, rec.source, rec.email, rec.username, rec.password, rec.password_hash, rec.hash_type, rec.phone, rec.breach_name, rec.breach_date, getattr(rec, "risk_score", 0.0), getattr(rec, "source_confidence", 0.5), json.dumps(rec.data_types), is_hvt, dk, ), ) await db.commit() except Exception as exc: logger.warning("DB store error: %s", exc) async def _save_correlations_async(self, profiles: List[TargetProfile]) -> None: async with aiosqlite.connect(self.path, timeout=15) as db: db.row_factory = aiosqlite.Row await db.execute("PRAGMA journal_mode=WAL") try: for profile in profiles: await db.execute( """UPDATE identities SET emails=?, usernames=?, phones=?, max_risk=?, is_hvt=?, pivot_count=? WHERE primary_id=?""", ( json.dumps(profile.emails), json.dumps(profile.usernames), json.dumps(profile.phones), profile.max_risk, int(profile.is_hvt), json.dumps(profile.pivot_count), profile.primary_id, ), ) async with db.execute( "SELECT id FROM identities WHERE primary_id=?", (profile.primary_id,) ) as cur: row = await cur.fetchone() if not row: continue iid = row["id"] for pivot_val, count in profile.pivot_count.items(): if count > 1: _ptype = Detect.qtype(pivot_val) if _ptype not in ("email", "username", "phone", "domain", "ip"): _ptype = "username" await db.execute( """INSERT INTO correlation_links (identity_id, pivot_type, pivot_value, linked_ids) VALUES (?,?,?,?)""", ( iid, _ptype, pivot_val[:64], json.dumps(profile.emails[:10]), ), ) await db.commit() except Exception as exc: logger.warning("DB correlation error: %s", exc) # ── Synchronous fallbacks (used when aiosqlite is absent) ───────── def _get_cached_sync(self, q_lower: str) -> Optional[List[dict]]: con = _sqlite3_fallback.connect(self.path, timeout=15) con.row_factory = _sqlite3_fallback.Row con.execute("PRAGMA journal_mode=WAL") try: row = con.execute( "SELECT id, scanned FROM query_cache WHERE query=?", (q_lower,) ).fetchone() if not row: return None if datetime.now(timezone.utc).timestamp() - row["scanned"] > Cfg.CACHE_TTL: return None return [ dict(r) for r in con.execute( "SELECT * FROM leaks WHERE email=? OR username=?", (q_lower, q_lower), ).fetchall() ] finally: con.close() def _cache_records_sync(self, query: str, qtype: str, records: list) -> None: con = _sqlite3_fallback.connect(self.path, timeout=15) con.row_factory = _sqlite3_fallback.Row con.execute("PRAGMA journal_mode=WAL") seen_hashes: Set[str] = set() try: con.execute( "INSERT OR REPLACE INTO query_cache (query, qtype) VALUES (?,?)", (query.lower(), qtype), ) for rec in records: dk = rec.dedup_key() if hasattr(rec, "dedup_key") else "" if dk and dk in seen_hashes: continue if dk: seen_hashes.add(dk) ident = rec.email or rec.username or rec.phone or query is_hvt = int(bool(_VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident))) con.execute( "INSERT OR IGNORE INTO identities (primary_id, is_hvt) VALUES (?,?)", (ident, is_hvt), ) row = con.execute( "SELECT id FROM identities WHERE primary_id=?", (ident,) ).fetchone() if not row: continue iid = row["id"] con.execute( """INSERT INTO leaks (identity_id, source, email, username, password, password_hash, hash_type, phone, breach_name, breach_date, risk_score, source_conf, data_types, is_hvt, dedup_hash) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", ( iid, rec.source, rec.email, rec.username, rec.password, rec.password_hash, rec.hash_type, rec.phone, rec.breach_name, rec.breach_date, getattr(rec, "risk_score", 0.0), getattr(rec, "source_confidence", 0.5), json.dumps(rec.data_types), is_hvt, dk, ), ) con.commit() except _sqlite3_fallback.OperationalError as exc: logger.warning("DB store error: %s", exc) finally: con.close() def _save_correlations_sync(self, profiles: List[TargetProfile]) -> None: con = _sqlite3_fallback.connect(self.path, timeout=15) con.row_factory = _sqlite3_fallback.Row con.execute("PRAGMA journal_mode=WAL") try: for profile in profiles: con.execute( """UPDATE identities SET emails=?, usernames=?, phones=?, max_risk=?, is_hvt=?, pivot_count=? WHERE primary_id=?""", ( json.dumps(profile.emails), json.dumps(profile.usernames), json.dumps(profile.phones), profile.max_risk, int(profile.is_hvt), json.dumps(profile.pivot_count), profile.primary_id, ), ) row = con.execute( "SELECT id FROM identities WHERE primary_id=?", (profile.primary_id,) ).fetchone() if not row: continue iid = row["id"] for pivot_val, count in profile.pivot_count.items(): if count > 1: _ptype = Detect.qtype(pivot_val) if _ptype not in ("email", "username", "phone", "domain", "ip"): _ptype = "username" con.execute( """INSERT INTO correlation_links (identity_id, pivot_type, pivot_value, linked_ids) VALUES (?,?,?,?)""", ( iid, _ptype, pivot_val[:64], json.dumps(profile.emails[:10]), ), ) con.commit() except _sqlite3_fallback.OperationalError as exc: logger.warning("DB correlation error: %s", exc) finally: con.close() def _get_hvt_sync(self) -> List[dict]: con = _sqlite3_fallback.connect(self.path, timeout=15) con.row_factory = _sqlite3_fallback.Row con.execute("PRAGMA journal_mode=WAL") try: return [ dict(r) for r in con.execute( "SELECT * FROM identities WHERE is_hvt=1 ORDER BY max_risk DESC" ).fetchall() ] finally: con.close() # ── Legacy DB (backward-compatible) ─────────────────────────────────── class DB: """ Legacy synchronous DB facade. Internally uses aiosqlite when available, running coroutines via a dedicated background event loop so callers remain synchronous. Falls back to sqlite3 when aiosqlite is absent. """ def __init__(self, path=None): self.path = str(path or Cfg.DB) self._use_async = aiosqlite is not None if self._use_async: import threading as _threading self._loop = asyncio.new_event_loop() self._loop_thread = _threading.Thread( target=self._loop.run_forever, daemon=True, name="nox-db-loop" ) self._loop_thread.start() self._init() # ── Internal helpers ────────────────────────────────────────────── def _run(self, coro): """Submit a coroutine to the background loop and block until done.""" fut = asyncio.run_coroutine_threadsafe(coro, self._loop) return fut.result(timeout=60) async def _exec(self, sql: str, params: tuple = ()) -> None: async with aiosqlite.connect(self.path, timeout=15) as db: await db.execute("PRAGMA journal_mode=WAL") await db.execute(sql, params) await db.commit() async def _fetchone(self, sql: str, params: tuple = ()) -> Optional[dict]: async with aiosqlite.connect(self.path, timeout=15) as db: db.row_factory = aiosqlite.Row await db.execute("PRAGMA journal_mode=WAL") async with db.execute(sql, params) as cur: row = await cur.fetchone() return dict(row) if row else None async def _fetchall(self, sql: str, params: tuple = ()) -> List[dict]: async with aiosqlite.connect(self.path, timeout=15) as db: db.row_factory = aiosqlite.Row await db.execute("PRAGMA journal_mode=WAL") async with db.execute(sql, params) as cur: rows = await cur.fetchall() return [dict(r) for r in rows] async def _init_async(self) -> None: async with aiosqlite.connect(self.path, timeout=15) as db: await db.execute("PRAGMA journal_mode=WAL") await db.executescript(""" CREATE TABLE IF NOT EXISTS breach_cache ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT NOT NULL, source TEXT NOT NULL, data TEXT NOT NULL, ts REAL NOT NULL, ttl INTEGER DEFAULT 86400, UNIQUE(query, source)); CREATE TABLE IF NOT EXISTS credentials ( id INTEGER PRIMARY KEY AUTOINCREMENT, email TEXT, username TEXT, password TEXT, password_hash TEXT, hash_type TEXT, source TEXT, breach_name TEXT, breach_date TEXT, ts REAL DEFAULT (strftime('%s','now')), UNIQUE(email, password_hash, source)); CREATE TABLE IF NOT EXISTS hash_cache ( hash TEXT PRIMARY KEY, hash_type TEXT, plaintext TEXT, source TEXT, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS api_keys ( service TEXT PRIMARY KEY, key TEXT NOT NULL, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS scans ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, qtype TEXT, results INTEGER, sources INTEGER, duration REAL, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS dork_cache ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, engine TEXT, dork TEXT, results TEXT, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS paste_cache ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, site TEXT, pid TEXT, content TEXT, ts REAL DEFAULT (strftime('%s','now')), UNIQUE(query, site, pid)); CREATE TABLE IF NOT EXISTS wordlists ( id INTEGER PRIMARY KEY AUTOINCREMENT, target TEXT, data TEXT, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS config ( key TEXT PRIMARY KEY, value TEXT); CREATE INDEX IF NOT EXISTS idx_cred_email ON credentials(email); CREATE INDEX IF NOT EXISTS idx_cred_user ON credentials(username); CREATE INDEX IF NOT EXISTS idx_cred_hash ON credentials(password_hash); CREATE INDEX IF NOT EXISTS idx_cache_q ON breach_cache(query); """) await db.commit() # ── Sync fallback helpers ───────────────────────────────────────── @contextmanager def _conn(self): c = _sqlite3_fallback.connect(self.path, timeout=15) c.row_factory = _sqlite3_fallback.Row c.execute("PRAGMA journal_mode=WAL") try: yield c c.commit() finally: c.close() def _init_sync(self): with self._conn() as c: c.executescript(""" CREATE TABLE IF NOT EXISTS breach_cache ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT NOT NULL, source TEXT NOT NULL, data TEXT NOT NULL, ts REAL NOT NULL, ttl INTEGER DEFAULT 86400, UNIQUE(query, source)); CREATE TABLE IF NOT EXISTS credentials ( id INTEGER PRIMARY KEY AUTOINCREMENT, email TEXT, username TEXT, password TEXT, password_hash TEXT, hash_type TEXT, source TEXT, breach_name TEXT, breach_date TEXT, ts REAL DEFAULT (strftime('%s','now')), UNIQUE(email, password_hash, source)); CREATE TABLE IF NOT EXISTS hash_cache ( hash TEXT PRIMARY KEY, hash_type TEXT, plaintext TEXT, source TEXT, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS api_keys ( service TEXT PRIMARY KEY, key TEXT NOT NULL, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS scans ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, qtype TEXT, results INTEGER, sources INTEGER, duration REAL, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS dork_cache ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, engine TEXT, dork TEXT, results TEXT, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS paste_cache ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, site TEXT, pid TEXT, content TEXT, ts REAL DEFAULT (strftime('%s','now')), UNIQUE(query, site, pid)); CREATE TABLE IF NOT EXISTS wordlists ( id INTEGER PRIMARY KEY AUTOINCREMENT, target TEXT, data TEXT, ts REAL DEFAULT (strftime('%s','now'))); CREATE TABLE IF NOT EXISTS config ( key TEXT PRIMARY KEY, value TEXT); CREATE INDEX IF NOT EXISTS idx_cred_email ON credentials(email); CREATE INDEX IF NOT EXISTS idx_cred_user ON credentials(username); CREATE INDEX IF NOT EXISTS idx_cred_hash ON credentials(password_hash); CREATE INDEX IF NOT EXISTS idx_cache_q ON breach_cache(query); """) # ── Schema init dispatcher ──────────────────────────────────────── def _init(self): if self._use_async: self._run(self._init_async()) else: self._init_sync() # ── Public API ──────────────────────────────────────────────────── def get_cache(self, q, src): if self._use_async: row = self._run(self._fetchone( "SELECT data,ts,ttl FROM breach_cache WHERE query=? AND source=?", (q.lower(), src), )) if row and (time.time() - row["ts"]) < row["ttl"]: return json.loads(row["data"]) return None with self._conn() as c: r = c.execute( "SELECT data,ts,ttl FROM breach_cache WHERE query=? AND source=?", (q.lower(), src), ).fetchone() if r and (time.time() - r["ts"]) < r["ttl"]: return json.loads(r["data"]) return None def set_cache(self, q, src, data, ttl=None): sql = "INSERT OR REPLACE INTO breach_cache (query,source,data,ts,ttl) VALUES (?,?,?,?,?)" params = (q.lower(), src, json.dumps(data, default=str), time.time(), ttl or Cfg.CACHE_TTL) if self._use_async: self._run(self._exec(sql, params)) else: with self._conn() as c: c.execute(sql, params) def store_cred(self, rec): # Use (email, password_hash, source) when hash is present; # fall back to (email, password, source) for cleartext-only records # so distinct cleartext passwords are never silently dropped. if rec.password_hash: sql = ("INSERT OR IGNORE INTO credentials " "(email,username,password,password_hash,hash_type,source,breach_name,breach_date) " "VALUES (?,?,?,?,?,?,?,?)") params = (rec.email, rec.username, rec.password, rec.password_hash, rec.hash_type, rec.source, rec.breach_name, rec.breach_date) else: sql = ("INSERT OR IGNORE INTO credentials " "(email,username,password,password_hash,hash_type,source,breach_name,breach_date) " "SELECT ?,?,?,?,?,?,?,? WHERE NOT EXISTS " "(SELECT 1 FROM credentials WHERE email=? AND password=? AND source=?)") params = (rec.email, rec.username, rec.password, rec.password_hash, rec.hash_type, rec.source, rec.breach_name, rec.breach_date, rec.email, rec.password, rec.source) if self._use_async: self._run(self._exec(sql, params)) else: with self._conn() as c: c.execute(sql, params) def get_key(self, svc): if self._use_async: row = self._run(self._fetchone( "SELECT key FROM api_keys WHERE service=?", (svc.lower(),) )) else: with self._conn() as c: r = c.execute("SELECT key FROM api_keys WHERE service=?", (svc.lower(),)).fetchone() row = dict(r) if r else None if row: return row["key"] svc_up = svc.upper().replace("-", "_") return ( os.environ.get(svc_up) or os.environ.get(f"{svc_up}_API_KEY") or os.environ.get(f"NOX_{svc_up}_KEY") or os.environ.get(f"NOX_{svc_up}_API_KEY") or "" ) def set_key(self, svc, key): sql = "INSERT OR REPLACE INTO api_keys (service, key) VALUES (?,?)" params = (svc.lower(), key) if self._use_async: self._run(self._exec(sql, params)) else: with self._conn() as c: c.execute(sql, params) def store_hash(self, h, ht, pt, src): sql = "INSERT OR REPLACE INTO hash_cache (hash,hash_type,plaintext,source) VALUES (?,?,?,?)" params = (h, ht, pt, src) if self._use_async: self._run(self._exec(sql, params)) else: with self._conn() as c: c.execute(sql, params) def get_plain(self, h): if self._use_async: row = self._run(self._fetchone( "SELECT plaintext FROM hash_cache WHERE hash=?", (h,) )) return row["plaintext"] if row else None with self._conn() as c: r = c.execute("SELECT plaintext FROM hash_cache WHERE hash=?", (h,)).fetchone() return r["plaintext"] if r else None def log_scan(self, q, qt, n, s, d): sql = "INSERT INTO scans (query,qtype,results,sources,duration) VALUES (?,?,?,?,?)" params = (q, qt, n, s, d) if self._use_async: self._run(self._exec(sql, params)) else: with self._conn() as c: c.execute(sql, params) def get_creds(self, q): sql = "SELECT * FROM credentials WHERE email=? OR username=? ORDER BY ts DESC" params = (q.lower(), q.lower()) if self._use_async: return self._run(self._fetchall(sql, params)) with self._conn() as c: return [dict(r) for r in c.execute(sql, params).fetchall()] def set_config(self, k, v): sql = "INSERT OR REPLACE INTO config (key, value) VALUES (?,?)" params = (k, v) if self._use_async: self._run(self._exec(sql, params)) else: with self._conn() as c: c.execute(sql, params) def get_config(self, k, default=""): if self._use_async: row = self._run(self._fetchone( "SELECT value FROM config WHERE key=?", (k,) )) return row["value"] if row else default with self._conn() as c: r = c.execute("SELECT value FROM config WHERE key=?", (k,)).fetchone() return r["value"] if r else default def close(self) -> None: """Stop the background event loop thread and release resources.""" if not (self._use_async and hasattr(self, "_loop")): return if self._loop.is_running(): self._loop.call_soon_threadsafe(self._loop.stop) if hasattr(self, "_loop_thread"): self._loop_thread.join(timeout=5) if not self._loop.is_closed(): self._loop.close() def __del__(self) -> None: try: self.close() except Exception: pass NoxDB = DB # ── Async TLS Context (JA3 fingerprint matching) ─────────────────────── def _build_ssl_context() -> ssl.SSLContext: """ Build an SSLContext that mirrors a modern Chrome/Firefox TLS handshake to prevent bot-detection false positives. """ ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ctx.minimum_version = ssl.TLSVersion.TLSv1_2 ctx.set_ciphers(Cfg.TLS_CIPHERS) ctx.check_hostname = True ctx.verify_mode = ssl.CERT_REQUIRED return ctx _SSL_CTX = _build_ssl_context() # SearXNG public instance pool — used by DorkingEngine and ScrapeEngine. # Instances are rotated randomly; proxy rotation distributes load across IPs. _SEARX_INSTANCES = [ "https://searx.tiekoetter.com", "https://search.sapti.me", "https://searx.perennialte.ch", "https://search.mdosch.de", "https://paulgo.io", "https://priv.au", ] # ── Header randomisation helpers ────────────────────────────────────── _UA_POOL = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:136.0) Gecko/20100101 Firefox/136.0", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0", "Mozilla/5.0 (iPhone; CPU iPhone OS 18_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1", "Mozilla/5.0 (Android 15; Mobile; rv:136.0) Gecko/136.0 Firefox/136.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15", ] _CH_UA_MAP = [ # Order matters: more specific patterns first ("Edg/135", '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"'), ("Chrome/135", '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"'), ("Chrome/134", '"Google Chrome";v="134", "Not-A.Brand";v="8", "Chromium";v="134"'), ] _ACCEPT_LANG_POOL = [ "en-US,en;q=0.9", "en-GB,en;q=0.9,en-US;q=0.8", "en-US,en;q=0.8,fr;q=0.5", "en-CA,en;q=0.9", "en-AU,en;q=0.9,en-US;q=0.8", ] _SEC_FETCH_DEST_POOL = ["document", "empty", "image", "script", "style"] _SEC_FETCH_MODE_POOL = ["navigate", "cors", "no-cors", "same-origin"] _SEC_FETCH_SITE_POOL = ["none", "same-origin", "cross-site", "same-site"] def _random_headers(extra: Optional[Dict] = None) -> Dict[str, str]: """Return a randomised, browser-grade header set with Client Hints for Chromium UAs.""" ua = random.choice(_UA_POOL) h = { "User-Agent": ua, "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": random.choice(_ACCEPT_LANG_POOL), "Accept-Encoding": "gzip, deflate, br, zstd", "DNT": "1", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-Dest": random.choice(_SEC_FETCH_DEST_POOL), "Sec-Fetch-Mode": random.choice(_SEC_FETCH_MODE_POOL), "Sec-Fetch-Site": random.choice(_SEC_FETCH_SITE_POOL), "Cache-Control": "max-age=0", } if extra: h.update(extra) # Derive the final UA after applying overrides so that a Firefox UA passed # via `extra` correctly suppresses Chromium-only Sec-CH-UA headers. final_ua = h["User-Agent"] if "Firefox" not in final_ua: ch_ua = next((v for k, v in _CH_UA_MAP if k in final_ua), None) if ch_ua: h["Sec-CH-UA"] = ch_ua h["Sec-CH-UA-Mobile"] = "?0" h["Sec-CH-UA-Platform"] = ( '"Windows"' if "Windows" in final_ua else '"macOS"' if "Mac" in final_ua else '"Linux"' ) return h async def _jitter(cfg: "NoxConfig") -> None: """Asynchronous jittered delay to respect server rate limits.""" if cfg.stealth: lo, hi = cfg.rate_limit await asyncio.sleep(random.uniform(lo, hi)) def _parse_retry_after(value: str, default: float) -> float: """Parse a Retry-After header value — handles both integer seconds and HTTP-date strings.""" try: return float(int(value)) except (ValueError, TypeError): pass try: from email.utils import parsedate_to_datetime delta = (parsedate_to_datetime(value) - datetime.now(timezone.utc)).total_seconds() return max(0.0, delta) except Exception: return default # ── Async Source Base ────────────────────────────────────────────────── class AsyncSource(ABC): """ Base class for all async breach sources. Subclasses implement `async_search` which is called by the Orchestrator through a shared asyncio.Semaphore. """ def __init__(self, semaphore, db: "DB", config: "NoxConfig") -> None: # Accept either a pre-built Semaphore or an int concurrency limit. # When an int is passed the semaphore is created lazily on first use # inside a running event loop (required on Python 3.10+). if isinstance(semaphore, asyncio.Semaphore): self._sem_obj: Optional[asyncio.Semaphore] = semaphore self._sem_limit: int = Cfg.CONCURRENCY # unused when _sem_obj is set else: self._sem_obj = None self._sem_limit = int(semaphore) if semaphore else Cfg.CONCURRENCY self._db = db self._config = config self.name = "Unknown" self.needs_key = False self.key_name = "" self.ok_email = True self.ok_user = True self.ok_phone = False self.ok_domain = False self.ok_ip = False self.ok_hash = False self.ok_pass = False self.ok_name = False self.ok_url = False @property def _sem(self) -> asyncio.Semaphore: """Return the semaphore, creating it lazily inside the running loop.""" if self._sem_obj is None: self._sem_obj = asyncio.Semaphore(self._sem_limit) return self._sem_obj def _key(self) -> str: if not self.key_name: return "" svc = self.key_name[:-8] if self.key_name.endswith("_api_key") else self.key_name return self._db.get_key(svc) def _ok(self, qt: str) -> bool: m = { "email": self.ok_email, "username": self.ok_user, "phone": self.ok_phone, "domain": self.ok_domain, "ip": self.ok_ip, "hash": self.ok_hash, "password": self.ok_pass, "name": self.ok_name, "url": self.ok_url, } return m.get(qt, False) def _rec(self, **kw) -> Record: kw.setdefault("source", self.name) sev = kw.pop("severity", Severity.MEDIUM) r = Record(**{k: v for k, v in kw.items() if k in Record.__dataclass_fields__}) r.severity = sev return r async def _get(self, session: "aiohttp.ClientSession", url: str, headers: Dict = None, timeout: int = None) -> Tuple[int, str, bytes]: """Perform a GET with jitter and retry logic.""" await _jitter(self._config) to = aiohttp_mod.ClientTimeout(total=timeout or self._config.timeout) if aiohttp_mod else None hdrs = _random_headers(headers) for attempt in range(Cfg.RETRIES): try: async with self._sem: async with session.get(url, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: if resp.status == 429: retry_after = _parse_retry_after(resp.headers.get("Retry-After", ""), Cfg.RETRY_DELAY * (attempt + 2)) _syslog.info("RATE_LIMIT source=%s url=%s retry_after=%ds", self.name, url[:80], retry_after) await asyncio.sleep(min(retry_after, 30)) continue body = await resp.read() if resp.status >= 400: _syslog.warning("API_ERROR source=%s status=%d url=%s", self.name, resp.status, url[:80]) return resp.status, await resp.text(errors="replace"), body except Exception as exc: if attempt < Cfg.RETRIES - 1: await asyncio.sleep(Cfg.RETRY_DELAY * (2 ** attempt) + random.uniform(0, 1)) continue _syslog.debug("API_FAIL source=%s url=%s error=%s", self.name, url[:80], exc) return 0, "", b"" async def _post(self, session: "aiohttp.ClientSession", url: str, json_data: Dict = None, data: Dict = None, headers: Dict = None, timeout: int = None) -> Tuple[int, str, bytes]: """Perform a POST with jitter and retry logic.""" await _jitter(self._config) to = aiohttp_mod.ClientTimeout(total=timeout or self._config.timeout) if aiohttp_mod else None hdrs = _random_headers(headers) for attempt in range(Cfg.RETRIES): try: async with self._sem: if json_data is not None: hdrs["Content-Type"] = "application/json" async with session.post(url, json=json_data, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: if resp.status == 429: retry_after = _parse_retry_after(resp.headers.get("Retry-After", ""), Cfg.RETRY_DELAY * (attempt + 2)) _syslog.info("RATE_LIMIT source=%s url=%s retry_after=%ds", self.name, url[:80], retry_after) await asyncio.sleep(min(retry_after, 30)) continue body = await resp.read() if resp.status >= 400: _syslog.warning("API_ERROR source=%s status=%d url=%s", self.name, resp.status, url[:80]) return resp.status, await resp.text(errors="replace"), body else: async with session.post(url, data=data or {}, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: if resp.status == 429: retry_after = _parse_retry_after(resp.headers.get("Retry-After", ""), Cfg.RETRY_DELAY * (attempt + 2)) _syslog.info("RATE_LIMIT source=%s url=%s retry_after=%ds", self.name, url[:80], retry_after) await asyncio.sleep(min(retry_after, 30)) continue body = await resp.read() if resp.status >= 400: _syslog.warning("API_ERROR source=%s status=%d url=%s", self.name, resp.status, url[:80]) return resp.status, await resp.text(errors="replace"), body except Exception as exc: if attempt < Cfg.RETRIES - 1: await asyncio.sleep(Cfg.RETRY_DELAY * (2 ** attempt) + random.uniform(0, 1)) continue _syslog.debug("API_FAIL source=%s url=%s error=%s", self.name, url[:80], exc) return 0, "", b"" @abstractmethod async def async_search(self, session: "aiohttp.ClientSession", query: str, qtype: str) -> List[Record]: """Coroutine that returns a list of Records for the given query.""" def search(self, query: str, qtype: str) -> List[Record]: """Synchronous shim — runs the coroutine in a new event loop (fallback).""" try: loop = asyncio.get_running_loop() except RuntimeError: loop = None try: if loop and loop.is_running(): import concurrent.futures with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex: fut = ex.submit(asyncio.run, self._run_search(query, qtype)) return fut.result(timeout=self._config.timeout + 10) return asyncio.run(self._run_search(query, qtype)) except Exception: return [] async def _run_search(self, query: str, qtype: str) -> List[Record]: if not aiohttp_mod: return [] connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX, limit=10, family=0) # AF_UNSPEC async with aiohttp_mod.ClientSession(connector=connector) as session: return await self.async_search(session, query, qtype) # ── Legacy sync shim (keeps all existing Src subclasses working) ─────── class Src(AsyncSource): """ Backward-compatible base that wraps the original synchronous `search` pattern while exposing the new AsyncSource interface. """ def __init__(self, semaphore_or_session, db: "DB", config: "NoxConfig" = None) -> None: if isinstance(semaphore_or_session, asyncio.Semaphore): sem = semaphore_or_session self._legacy_session = None else: # Legacy: passed a Session object — use int limit so semaphore # is created lazily inside the event loop (Python 3.13 safe). sem = Cfg.CONCURRENCY self._legacy_session = semaphore_or_session super().__init__(sem, db, config or NoxConfig()) # Legacy attribute alias self.s = self._legacy_session async def async_search(self, session: "aiohttp.ClientSession", query: str, qtype: str) -> List[Record]: loop = asyncio.get_running_loop() return await loop.run_in_executor(None, self.search, query, qtype) @abstractmethod def search(self, query: str, qtype: str) -> List[Record]: pass # ── Input Detection ──────────────────────────────────────────────────── class Detect: @staticmethod def qtype(q: str) -> str: q = q.strip() if re.match(r"^[\w.+-]+@[\w-]+\.[\w.]+$", q): return "email" if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", q) and all(0 <= int(o) <= 255 for o in q.split(".")): return "ip" if re.match(r"^(\+?\d{1,3}[\s.-]?)?$?\d{2,4}$?[\s.-]?\d{3,4}[\s.-]?\d{3,4}$", q): return "phone" if re.match(r"^[a-fA-F0-9]{32,128}$", q): return "hash" if re.match(r"^\$2[aby]?\$", q) or re.match(r"^\$argon2", q) or re.match(r"^\$[156]\$", q): return "hash" if re.match(r"^https?://", q): return "url" if re.match(r"^[a-zA-Z0-9]([a-zA-Z0-9-]*\.)+[a-zA-Z]{2,}$", q) and "." in q: return "domain" if len(q) <= 30 and re.match(r"^[\w.-]+$", q): return "username" if " " in q and len(q.split()) >= 2 and len(q) <= 60: return "name" return "username" # ── Legacy synchronous Session (kept for Src subclasses) ────────────── class Session: UA = _UA_POOL def __init__(self, config: NoxConfig) -> None: self.config = config self.use_tor = config.use_tor self.proxy = config.proxy self._lock = threading.Lock() self._n = 0 self._s = None self._cs = None if requests: self._s = requests.Session() self._s.verify = True if self.use_tor: self._s.proxies = { "http": f"socks5h://127.0.0.1:{config.tor_socks}", "https": f"socks5h://127.0.0.1:{config.tor_socks}", } if cloudscraper: try: self._cs = cloudscraper.create_scraper( browser={"browser": "chrome", "platform": "windows", "mobile": False} ) if self.use_tor: self._cs.proxies = { "http": f"socks5h://127.0.0.1:{config.tor_socks}", "https": f"socks5h://127.0.0.1:{config.tor_socks}", } except Exception: pass self._jar = http.cookiejar.CookieJar() self._opener = urllib.request.build_opener( urllib.request.HTTPCookieProcessor(self._jar), urllib.request.HTTPRedirectHandler(), ) def _hdrs(self, extra: Dict = None) -> Dict: return _random_headers(extra) def _rl(self) -> None: if self.config.stealth: time.sleep(random.uniform(*self.config.rate_limit)) with self._lock: self._n += 1 @staticmethod def _make_response(status: int, body: bytes, hdrs: dict, url: str): text = body.decode("utf-8", errors="replace") _body = body def _json(*_): return json.loads(_body.decode("utf-8", errors="replace")) ok = 200 <= status < 300 return type("R", (), { "status_code": status, "ok": ok, "text": text, "content": _body, "json": _json, "headers": hdrs, "url": url, })() @staticmethod def _null_response(url: str = ""): def _json(*_): return {} return type("R", (), { "status_code": 0, "ok": False, "text": "", "content": b"", "json": _json, "headers": {}, "url": url, })() def get(self, url: str, extra_headers: Dict = None, timeout: int = None, use_cloudscraper: bool = False): self._rl() to = timeout or self.config.timeout hdrs = self._hdrs(extra_headers) for attempt in range(Cfg.RETRIES): try: if use_cloudscraper and self._cs: r = self._cs.get(url, headers=hdrs, timeout=to) elif self._s: px = {"http": self.proxy, "https": self.proxy} if self.proxy else None r = self._s.get(url, headers=hdrs, timeout=to, proxies=px) else: req = urllib.request.Request(url, headers=hdrs) raw = self._opener.open(req, timeout=to) data = raw.read() if raw.headers.get("Content-Encoding") == "gzip": data = gzip.decompress(data) return self._make_response(raw.status, data, dict(raw.headers), raw.url) if getattr(r, "status_code", 0) == 429: retry_after = _parse_retry_after(r.headers.get("Retry-After", ""), Cfg.RETRY_DELAY * (attempt + 2)) time.sleep(min(retry_after, 30)) continue return r except Exception as e: if attempt < Cfg.RETRIES - 1: time.sleep(Cfg.RETRY_DELAY * (2 ** attempt) + random.uniform(0, 1)) continue logger.debug("GET fail %s: %s", url, e) return self._null_response(url) def post(self, url: str, data: Dict = None, json_data: Dict = None, extra_headers: Dict = None, timeout: int = None): self._rl() to = timeout or self.config.timeout hdrs = self._hdrs(extra_headers) for attempt in range(Cfg.RETRIES): try: if self._s: if json_data: hdrs["Content-Type"] = "application/json" r = self._s.post(url, json=json_data, headers=hdrs, timeout=to) else: r = self._s.post(url, data=data, headers=hdrs, timeout=to) if getattr(r, "status_code", 0) == 429: retry_after = _parse_retry_after(r.headers.get("Retry-After", ""), Cfg.RETRY_DELAY * (attempt + 2)) time.sleep(min(retry_after, 30)) continue return r body = json.dumps(json_data).encode() if json_data else urllib.parse.urlencode(data or {}).encode() hdrs["Content-Type"] = "application/json" if json_data else "application/x-www-form-urlencoded" req = urllib.request.Request(url, data=body, headers=hdrs, method="POST") raw = self._opener.open(req, timeout=to) rd = raw.read() if raw.headers.get("Content-Encoding") == "gzip": rd = gzip.decompress(rd) return self._make_response(raw.status, rd, dict(raw.headers), raw.url) except Exception as e: if attempt < Cfg.RETRIES - 1: time.sleep(Cfg.RETRY_DELAY * (2 ** attempt) + random.uniform(0, 1)) continue logger.debug("POST fail %s: %s", url, e) return self._null_response(url) def new_circuit(self) -> bool: if not stem: return False try: from stem import Signal from stem.control import Controller with Controller.from_port(port=self.config.tor_ctrl) as ctrl: ctrl.authenticate(password=self.config.tor_pass) ctrl.signal(Signal.NEWNYM) time.sleep(3) return True except Exception: return False # ======================================================================= # SOURCE REGISTRY # ======================================================================= class Registry: """All intelligence sources are loaded dynamically from sources/*.json by SourceOrchestrator.""" @classmethod def get(cls, session: "Session", db: "DB", qt: str = None) -> list: return [] @classmethod def count(cls) -> int: return 0 class _LegacySourcePlaceholder(Src): async def async_search(self, session, query, qtype): return [] def search(self, query, qtype): return [] # ======================================================================= # PROXY MANAGER — Guardian System # ======================================================================= class ProxyManager: """ Dynamic proxy engine ("Guardian System"). Priority: 1. proxies.txt in the working directory — loaded and validated. 2. Auto-fetch from ProxyScrape API if proxies.txt is missing. 3. Direct connection fallback if auto-fetch fails. Proxies are stored in memory and rotated per-request by consumers. Fail-Safe: when allow_leak=False (default) and a proxy/Tor was explicitly requested but no transport is available, execution is aborted to prevent real-IP exposure. """ _VALID_SCHEMES = ("http://", "https://", "socks5://", "socks4://") _cache: List[str] = [] @classmethod def reset(cls) -> None: """Clear the cached proxy pool so the next call to get_proxies() re-fetches.""" cls._cache = [] @classmethod def get_proxies(cls) -> List[str]: """Return a validated proxy list, fetching if necessary.""" if cls._cache: return list(cls._cache) proxy_file = Path("proxies.txt") if proxy_file.exists(): raw = [ l.strip() for l in proxy_file.read_text().splitlines() if l.strip() and any(l.strip().startswith(s) for s in cls._VALID_SCHEMES) ] if raw: cls._cache = raw out("info", f"[ProxyManager] Loaded {len(raw)} proxies from proxies.txt") return list(cls._cache) out("warn", "[ProxyManager] proxies.txt found but contains no valid entries — auto-fetching.") # Auto-fetch print( f"\n {C.BD}{C.Y}[!] OPSEC WARNING: Using public auto-fetched proxies. " f"For professional engagements, use Tor (--tor) or a private proxies.txt.{C.X}\n" ) fetched = cls._fetch_proxies() if fetched: cls._cache = fetched out("ok", f"[ProxyManager] Auto-fetched {len(fetched)} proxies.") return list(cls._cache) # Failover: direct connection print( f"\n {C.BD}{C.R}[!] WARNING: Proxy auto-fetch failed. " f"Falling back to DIRECT connection — your real IP may be exposed.{C.X}\n" ) cls._cache = [] return [] @classmethod def fail_safe_check(cls, config: "NoxConfig", allow_leak: bool = False) -> None: """ Fail-Safe Proxy enforcement. If the user explicitly requested a proxy or Tor but the transport is unavailable, abort execution immediately to prevent IP leakage. Pass allow_leak=True (--allow-leak flag) to bypass this check. """ proxy_requested = bool(config.proxy) or config.use_tor if not proxy_requested: return # Guardian Engine handles the no-proxy case separately transport_ready = False if config.use_tor: # Verify Tor SOCKS port is reachable import socket try: s = socket.create_connection(("127.0.0.1", config.tor_socks), timeout=3) s.close() transport_ready = True except OSError: transport_ready = False elif config.proxy: # Treat any non-empty proxy string as "configured" — aiohttp will # surface the error at request time; we just confirm it is set. transport_ready = True if not transport_ready: if allow_leak: print( f"\n {C.BD}{C.Y}[WARNING] OPSEC Alert: Proxy/Tor failed. " f"Continuing execution with REAL IP (--allow-leak active).{C.X}\n" ) return print( f"\n {C.BD}{C.R}[CRITICAL] OPSEC FAILURE: Requested Proxy/Tor is unavailable. " f"Execution aborted to prevent IP leak. Use --allow-leak to override.{C.X}\n" ) sys.exit(1) _PROXY_SOURCES = [ "https://api.proxyscrape.com/v3/free-proxy-list/get?request=displayproxies&protocol=http&timeout=5000&proxy_format=protocolipport&format=text", "https://raw.githubusercontent.com/proxifly/free-proxy-list/main/proxies/protocols/http/data.txt", "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt", ] @classmethod def _fetch_proxies(cls) -> List[str]: proxies: List[str] = [] for url in cls._PROXY_SOURCES: if proxies: break try: req = urllib.request.Request(url, headers={"User-Agent": "NOX Framework/ProxyManager"}) raw = urllib.request.urlopen(req, timeout=10) text = raw.read().decode("utf-8", errors="replace") for line in text.splitlines(): line = line.strip() if not line: continue if re.match(r"^\d{1,3}(\.\d{1,3}){3}:\d{2,5}$", line): proxies.append(f"http://{line}") elif any(line.startswith(s) for s in cls._VALID_SCHEMES): proxies.append(line) if proxies: logger.debug("ProxyManager: fetched %d proxies from %s", len(proxies), url) except Exception as exc: logger.debug("ProxyManager._fetch_proxies source=%s: %s", url, exc) continue return proxies[:200] @classmethod def validate_proxy(cls, proxy: str, timeout: int = 6) -> Optional[str]: """ Test a proxy by requesting https://api.ipify.org. Returns the observed exit IP on success, None on failure. SOCKS5 proxies are validated via requests+PySocks, not urllib. """ # urllib.ProxyHandler does not support SOCKS5 — use requests if available if proxy.startswith("socks5") or proxy.startswith("socks4"): try: import requests as _req # type: ignore resp = _req.get("https://api.ipify.org", proxies={"http": proxy, "https": proxy}, timeout=timeout) ip = resp.text.strip() if re.match(r"^\d{1,3}(\.\d{1,3}){3}$", ip): return ip except Exception: pass return None try: import urllib.request as _ur proxy_handler = _ur.ProxyHandler({"http": proxy, "https": proxy}) opener = _ur.build_opener(proxy_handler) resp = opener.open("https://api.ipify.org", timeout=timeout) ip = resp.read().decode().strip() if re.match(r"^\d{1,3}(\.\d{1,3}){3}$", ip): return ip except Exception: pass return None # ======================================================================= # DORKING ENGINE — passive document discovery + metadata extraction # ======================================================================= class _DorkTemplates: """Shared dork template lists — defined before DorkingEngine and DorkEngine to avoid forward-reference errors.""" NAME_DORKS = [ '"{q}" filetype:pdf', '"{q}" filetype:xlsx', '"{q}" filetype:csv', '"{q}" filetype:doc OR filetype:docx', '"{q}" filetype:txt', '"{q}" site:linkedin.com', '"{q}" site:facebook.com', '"{q}" site:twitter.com', '"{q}" site:instagram.com', '"{q}" site:github.com', '"{q}" site:pastebin.com', '"{q}" site:ghostbin.co', '"{q}" site:rentry.co', '"{q}" site:pastebin.com "password"', '"{q}" site:pastebin.com "email"', '"{q}" intext:"password"', '"{q}" intext:"email"', '"{q}" intext:"phone"', '"{q}" intext:"address"', '"{q}" intext:"credentials"', '"{q}" "database dump"', '"{q}" "INSERT INTO"', '"{q}" site:github.com "password"', '"{q}" site:gist.github.com', '"{q}" site:docs.google.com', '"{q}" site:trello.com', '"{q}" filetype:pdf site:gov', '"{q}" filetype:pdf site:edu', ] DOMAIN_DORKS = [ 'site:{q} filetype:sql', 'site:{q} filetype:env', 'site:{q} filetype:log', 'site:{q} inurl:admin', 'site:{q} inurl:login', 'site:{q} inurl:wp-config', 'site:{q} inurl:.git', 'site:{q} inurl:backup', 'site:{q} filetype:bak', 'site:{q} "index of" password', 'site:{q} inurl:config.php', 'site:{q} ext:conf OR ext:cnf OR ext:cfg', 'site:{q} "phpinfo()"', 'site:{q} filetype:xml intext:password', 'site:{q} filetype:json api_key OR secret', 'site:{q} intitle:"index of" .env', 'site:{q} ext:pem OR ext:key', 'site:{q} "PRIVATE KEY"', 'site:{q} filetype:xlsx', 'site:{q} filetype:csv', 'site:{q} intitle:"Dashboard" inurl:admin', 'site:{q} inurl:api password', 'site:{q} filetype:sql "INSERT INTO"', 'site:{q} filetype:log "password"', 'site:{q} filetype:env "DB_PASSWORD"', 'site:{q} filetype:yaml "password"', 'site:{q} inurl:phpinfo.php', 'site:{q} inurl:.git/config', 'site:{q} inurl:wp-config.php', 'site:{q} inurl:.env', 'site:{q} inurl:database.yml', 'site:{q} inurl:secrets.yml', 'site:{q} intitle:"index of" "backup"', 'site:{q} intitle:"index of" "dump"', 'site:{q} intitle:"index of" "sql"', 'site:{q} intitle:"index of" "database"', 'site:{q} intitle:"index of" ".env"', 'site:{q} intitle:"index of" "sql_dump"', 'site:{q} ext:sql "sql_dump"', 'site:{q} inurl:sql_dump', 'site:{q} intitle:"index of" "backup.sql"', 'site:{q} intitle:"index of" "dump.sql"', ] class DorkingEngine(Src): """Passive document discovery via Google/DDG dorks + PDF/Office metadata extraction.""" name = "DorkingEngine" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._dead_proxies: set = set() self._proxy_index: int = 0 self.proxies = ProxyManager.get_proxies() self._dead_instances: set = set() def _get_next_proxy(self) -> Optional[str]: live = [p for p in self.proxies if p not in self._dead_proxies] if not live: return None self._proxy_index = (self._proxy_index + 1) % len(live) return live[self._proxy_index] _DOC_DORKS = [ '"{q}" filetype:pdf', '"{q}" filetype:xlsx', '"{q}" filetype:docx', '"{q}" filetype:pptx', '"{q}" filetype:log', '"{q}" site:pastebin.com', '"{q}" site:docs.google.com', '"{q}" site:drive.google.com', '"{q}" filetype:pdf site:gov', '"{q}" filetype:pdf site:edu', '"{q}" filetype:xlsx site:gov', ] _META_RE = { "author": re.compile(rb"/Author\s*$([^)]{1,120})$", re.I), "creator": re.compile(rb"/Creator\s*$([^)]{1,120})$", re.I), "software": re.compile(rb"/Producer\s*$([^)]{1,120})$", re.I), "local_paths": re.compile(rb"(?:[A-Za-z]:\\|/home/|/root/|/var/|/etc/)(?:[^\x00-\x1f\r\n]{1,200})", re.I), "emails": re.compile(rb"[\w.+-]{1,64}@[\w-]{1,63}\.[\w.]{2,10}", re.I), } def generate_queries(self, target: str, qtype: str = "email") -> List[str]: if qtype == "name": templates = _DorkTemplates.NAME_DORKS elif qtype == "domain": templates = _DorkTemplates.DOMAIN_DORKS else: templates = self._DOC_DORKS return [d.replace("{q}", target) for d in templates] @staticmethod async def extract_metadata(url: str, session) -> dict: meta = {"author": "", "creator": "", "software": "", "local_paths": [], "emails": []} try: async with session.get(url, timeout=aiohttp_mod.ClientTimeout(total=15), headers={"User-Agent": random.choice(_UA_POOL)}) as resp: if resp.status != 200: return meta chunk = await resp.content.read(131072) # 128 KB for key, pat in DorkingEngine._META_RE.items(): hits = pat.findall(chunk) if not hits: continue decoded = [h.decode("latin-1", errors="replace").strip() for h in hits] if key in ("local_paths", "emails"): meta[key] = list(dict.fromkeys(decoded))[:10] else: meta[key] = decoded[0] except Exception: pass return meta async def _ddg_search(self, query: str, _session=None) -> List[dict]: """DDG HTML is bot-blocked since 2025. Use SearXNG public JSON API.""" if not aiohttp_mod: return [] try: from aiohttp_socks import ProxyConnector as _ProxyConnector except ImportError: _ProxyConnector = None live_instances = [i for i in _SEARX_INSTANCES if i not in self._dead_instances] if not live_instances: self._dead_instances.clear() live_instances = list(_SEARX_INSTANCES) instance = random.choice(live_instances) url = f"{instance}/search?q={urllib.parse.quote(query)}&format=json&categories=general" proxy = self._get_next_proxy() try: if proxy and _ProxyConnector: connector = _ProxyConnector.from_url(proxy) else: connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX) async with aiohttp_mod.ClientSession(connector=connector) as sess: async with sess.get(url, headers=_random_headers(), timeout=aiohttp_mod.ClientTimeout(total=12)) as resp: if resp.status != 200: self._dead_instances.add(instance) if proxy: self._dead_proxies.add(proxy) return [] data = await resp.json(content_type=None) return [ {"url": r.get("url", ""), "title": r.get("title", ""), "dork": query} for r in data.get("results", [])[:5] if r.get("url") ] except Exception: self._dead_instances.add(instance) if proxy: self._dead_proxies.add(proxy) return [] async def async_search(self, session, query: str, qtype: str) -> List[Record]: if not aiohttp_mod: return [] dorks = self.generate_queries(query, qtype) seen_urls: Set[str] = set() async def _process_dork(dork: str) -> List[Tuple]: await asyncio.sleep(random.uniform(0.5, 2.0)) hits = await self._ddg_search(dork) rows = [] for hit in hits: url = hit.get("url", "") if not url or url in seen_urls: continue seen_urls.add(url) ext = url.lower().rsplit(".", 1)[-1].split("?")[0] if "." in url else "" meta = await DorkingEngine.extract_metadata(url, session) if ext in ("pdf", "xlsx", "docx", "pptx", "log") else {} rows.append((url, ext, meta, dork)) return rows all_rows = [] for batch in [dorks[i:i+5] for i in range(0, len(dorks), 5)]: results = await asyncio.gather(*[_process_dork(d) for d in batch], return_exceptions=True) for r in results: if isinstance(r, list): all_rows.extend(r) records = [ Record(source="DorkingEngine", email=query, raw_data={"url": url, "dork": dork}, metadata=meta) for url, ext, meta, dork in all_rows ] if all_rows and aiosqlite: try: async with aiosqlite.connect(self._db.path) as db: await db.executemany( "INSERT OR IGNORE INTO dork_results " "(source_url, file_type, metadata_json, parent_target) " "VALUES (?,?,?,?)", [(url, ext, json.dumps(meta), query) for url, ext, meta, _ in all_rows]) await db.commit() except Exception as exc: logger.debug("dork_results persist failed: %s", exc) return records def search(self, query: str, qtype: str) -> List[Record]: # sync fallback — not used when aiohttp is available return [] # ======================================================================= # DORK ENGINE # ======================================================================= class DorkEngine: # Delegate to _DorkTemplates to avoid duplication NAME_DORKS = _DorkTemplates.NAME_DORKS DOMAIN_DORKS = _DorkTemplates.DOMAIN_DORKS EMAIL_DORKS = [ '"{q}" filetype:sql password', '"{q}" filetype:env', '"{q}" filetype:log password', '"{q}" filetype:txt intext:password', '"{q}" filetype:csv email password', '"{q}" filetype:xlsx password', '"{q}" filetype:cfg password', '"{q}" filetype:conf password', '"{q}" filetype:bak password', '"{q}" filetype:json api_key', '"{q}" filetype:yaml password', '"{q}" site:pastebin.com', '"{q}" site:ghostbin.co', '"{q}" site:rentry.co', '"{q}" site:justpaste.it', '"{q}" site:dpaste.org', '"{q}" site:paste.ee', '"{q}" site:hastebin.com', '"{q}" site:privatebin.net', '"{q}" site:controlc.com', '"{q}" site:github.com password', '"{q}" site:gitlab.com password', '"{q}" site:docs.google.com', '"{q}" site:trello.com', '"{q}" site:mega.nz', '"{q}" intext:"password" intext:"username"', '"{q}" intext:"credentials" filetype:txt', '"{q}" filetype:env DB_PASSWORD', '"{q}" filetype:env "API_KEY"', '"{q}" ext:sql "INSERT INTO" -git', '"{q}" ext:json "password"', '"{q}" ext:yml "password"', '"{q}" ext:yaml "api_key"', '"{q}" intitle:"index of" "passwords.txt"', '"{q}" intitle:"index of" "credentials.txt"', '"{q}" inurl:passlist.txt', '"{q}" inurl:passwords.txt', '"{q}" inurl:credentials.txt', '"{q}" "database dump" filetype:sql', '"{q}" "INSERT INTO" "password"', '"{q}" site:pastebin.com "password"', '"{q}" site:pastebin.com "credentials"', '"{q}" site:github.com "password"', '"{q}" site:gist.github.com "password"', ] def __init__(self, session: "Session") -> None: self.s = session def run(self, q: str, qt: str, engines: List[str] = None) -> List[dict]: """ Parallelised dork runner. All (dork, engine) pairs are dispatched concurrently via a thread pool. Per-engine jitter is applied inside _search so the sleep is not sequential. Total wall-clock time ≈ max(single_request_time) instead of O(n_dorks × sleep). """ if engines is None: engines = ["google", "bing", "ddg"] dorks = self.EMAIL_DORKS if qt == "email" else self.DOMAIN_DORKS if qt == "domain" else self.NAME_DORKS if qt == "name" else self.EMAIL_DORKS[:20] dorks = dorks[:Cfg.DORK_MAX] from concurrent.futures import ThreadPoolExecutor, as_completed as _as_completed def _run_one(dork: str) -> List[dict]: query = dork.replace("{q}", q) time.sleep(random.uniform(*Cfg.DORK_DELAY)) hits = self._search(query, "SearXNG") for h in hits: h["dork"] = query h["engine"] = "SearXNG" return hits results = [] max_workers = min(len(dorks), 12) with ThreadPoolExecutor(max_workers=max_workers) as pool: futures = {pool.submit(_run_one, d): d for d in dorks} for fut in _as_completed(futures): try: results.extend(fut.result()) except Exception: pass seen = set() unique = [] for r in results: key = r.get("url", r.get("title", "")) if key not in seen: seen.add(key) unique.append(r) return unique def _search(self, query: str, engine: str) -> List[dict]: hits = [] try: # Direct Google/Bing HTML scraping is blocked by CAPTCHA/consent walls # since 2024. Route all engines through SearXNG JSON API. url = f"{random.choice(_SEARX_INSTANCES)}/search?q={urllib.parse.quote(query)}&format=json&categories=general" resp = self.s.get(url, timeout=15, use_cloudscraper=False) if not resp.ok: return hits data = resp.json() for r in data.get("results", [])[:10]: if r.get("url"): hits.append({ "title": r.get("title", ""), "url": r["url"], "snippet": r.get("content", ""), }) except Exception: pass return hits # ======================================================================= # SCRAPE ENGINE — Telegram indexer + advanced dorks + regex extraction # ======================================================================= class ScrapeEngine: PASTE_SITES = [ # Paste intelligence is routed through SearXNG dorks and IntelX. ("IntelX", "https://2.intelx.io/intelligent/search", "intelx"), ] CRED_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.-]+\s*[:;|]\s*\S+", re.IGNORECASE) EMAIL_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.]+") HASH_RE = re.compile(r"\b[a-f0-9]{32,128}\b", re.IGNORECASE) COMBO_RE = re.compile(r"^[^:]+:[^:]+$", re.MULTILINE) PATTERNS = [ (re.compile(r"(?:password|passwd|pass|pwd)\s*[:=]\s*\S+", re.I), "Password"), (re.compile(r"(?:api[_-]?(?:key|secret)|access_token|auth_token)\s*[:=]\s*['\"]?[A-Za-z0-9_\-]{16,}", re.I), "API Key/Token"), (re.compile(r"AKIA[0-9A-Z]{16}"), "AWS Access Key"), (re.compile(r"(?:aws_secret|secret_access_key)\s*[:=]\s*[A-Za-z0-9/+=]{40}", re.I), "AWS Secret Key"), (re.compile(r"-----BEGIN (?:RSA|EC|OPENSSH )?PRIVATE KEY-----"), "Private Key"), (re.compile(r"(?:mysql|postgres|mongodb|redis|mssql)://[^\s\"'<>]{8,}", re.I), "DB Connection"), (re.compile(r"eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}"), "JWT Token"), (re.compile(r"xox[baprs]-[0-9A-Za-z-]+"), "Slack Token"), (re.compile(r"https://hooks\.slack\.com/services/T[A-Z0-9]+/B[A-Z0-9]+/[A-Za-z0-9]+"), "Slack Webhook"), (re.compile(r"gh[pousr]_[A-Za-z0-9]{36}"), "GitHub Token"), (re.compile(r"glpat-[A-Za-z0-9_-]{20,}"), "GitLab Token"), (re.compile(r"ya29\.[A-Za-z0-9_-]+"), "Google OAuth"), (re.compile(r"AIza[0-9A-Za-z_-]{35}"), "Google API Key"), (re.compile(r"sk_live_[0-9a-zA-Z]{24}"), "Stripe Live Key"), (re.compile(r"sk_test_[0-9a-zA-Z]{24}"), "Stripe Test Key"), (re.compile(r"rk_live_[0-9a-zA-Z]{24}"), "Stripe Restricted Key"), (re.compile(r"[MN][A-Za-z\d]{23}\.[\w-]{6}\.[\w-]{27}"), "Discord Token"), (re.compile(r"\d{8,10}:[A-Za-z0-9_-]{35,40}"), "Telegram Bot Token"), (re.compile(r"EAACEdEose0cBA[0-9A-Za-z]+"), "Facebook Token"), (re.compile(r"\b[a-f0-9]{32}\b", re.I), "MD5 Hash"), (re.compile(r"\b[a-f0-9]{40}\b", re.I), "SHA1 Hash"), (re.compile(r"\b[a-f0-9]{64}\b", re.I), "SHA256 Hash"), (re.compile(r"\$2[aby]\$\d{2}\$[./A-Za-z0-9]{53}"), "Bcrypt Hash"), (re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}"), "Email"), ] TELEGRAM_CTI_CHANNELS = [ "leakbase", "breachforums", "darkleaks", "combolist", "databreach", "leakednews", "cybercrime", "hackersnews", "threatintel", "darkweb", ] def __init__(self, session: "Session", db: "DB") -> None: self.s = session self.db = db def run(self, q: str, qt: str) -> dict: results = {"pastes": [], "credentials": [], "hashes": [], "telegram": [], "dork_misconfigs": []} # Phase 1: Paste sites import xml.etree.ElementTree as ET for name, url, fmt in self.PASTE_SITES: try: if fmt == "json": resp = self.s.get(url.replace("{q}", urllib.parse.quote(q)), timeout=12) if resp.ok: data = resp.json() if isinstance(resp.json(), list) else resp.json().get("data",[]) for p in (data or [])[:Cfg.PASTE_MAX]: pid = p.get("id","") if isinstance(p,dict) else str(p) results["pastes"].append({"site":name,"id":pid,"data":p}) elif fmt == "xml": resp = self.s.get(url.replace("{q}", urllib.parse.quote(q)), timeout=12) if resp.ok: root = ET.fromstring(resp.text) for item in root.findall(".//item")[:Cfg.PASTE_MAX]: pid = item.findtext("key") or item.findtext("id") or "" results["pastes"].append({"site":name,"id":pid,"data":item}) elif fmt == "intelx": key = Vault.get("INTELX_API_KEY") or self.db.get_key("intelx_api_key") if key: resp = self.s.post(url, json_data={"term":q,"maxresults":Cfg.PASTE_MAX,"media":0,"target":0}, extra_headers={"x-key":key}, timeout=15) if resp.ok: sid = resp.json().get("id") if sid: # Exponential backoff poll _delay = 2 for _attempt in range(4): time.sleep(_delay) res = self.s.get(f"https://2.intelx.io/intelligent/search/result?id={sid}", extra_headers={"x-key":key}, timeout=15) if res.ok: records_data = res.json().get("records", []) if records_data: for r in records_data[:Cfg.PASTE_MAX]: results["pastes"].append({"site":"IntelX","id":r.get("systemid",""),"data":r}) break _delay = min(_delay * 2, 16) # cap at 16s except Exception: continue # Phase 2: Extract credentials from paste content for paste in results["pastes"][:Cfg.PASTE_MAX]: try: content = self._fetch_content(paste) if content: for c in self.CRED_RE.findall(content)[:50]: results["credentials"].append({"raw":c,"source":paste.get("site",""),"paste_id":paste.get("id","")}) for h in self.HASH_RE.findall(content)[:20]: results["hashes"].append({"hash":h,"source":paste.get("site",""),"paste_id":paste.get("id","")}) for combo in self.COMBO_RE.findall(content)[:50]: if ":" in combo: email, pw = combo.split(":",1) if "@" in email and len(pw) > 0: results["credentials"].append({"raw":combo,"source":paste.get("site",""),"paste_id":paste.get("id","")}) found_patterns: Dict[str, List] = {} for pat, label in self.PATTERNS: matches = pat.findall(content) if matches: found_patterns[label] = matches[:10] if found_patterns: paste["patterns"] = found_patterns except Exception: continue # Phase 3: Public Telegram Indexer results["telegram"] = self._telegram_index(q, qt) # Phase 4: Advanced misconfiguration search results["dork_misconfigs"] = self._dork_misconfigs(q, qt) # Phase 5: DDG search for leaked data _ddg_queries = { "name": [f'"{q}" password leak', f'"{q}" database dump', f'"{q}" site:pastebin.com', f'"{q}" credentials'], "email": [f'"{q}" password leak', f'"{q}" database dump'], "domain": [f'site:{q} password', f'"{q}" database dump'], } for sq in _ddg_queries.get(qt, [f'"{q}" password leak', f'"{q}" database dump']): try: resp = self.s.get(f"{random.choice(_SEARX_INSTANCES)}/search?q={urllib.parse.quote(sq)}&format=json&categories=general", timeout=10) if resp.ok: try: data = resp.json() for r in data.get("results", [])[:5]: if r.get("title"): results["pastes"].append({"site": "SearXNG", "title": r["title"], "url": r.get("url", ""), "query": sq}) except Exception: pass except Exception: continue return results def _telegram_index(self, q: str, qt: str) -> List[dict]: """ Parse public Telegram web-gateway previews to index public CTI telemetry and threat actor communications. """ hits = [] targets = [q] if qt in ("username", "domain", "name") else [] targets += self.TELEGRAM_CTI_CHANNELS for channel in targets: try: resp = self.s.get(f"https://t.me/s/{urllib.parse.quote(channel)}", timeout=10, use_cloudscraper=True) if not resp.ok or not BeautifulSoup: continue soup = BeautifulSoup(resp.text, "html.parser") msgs = soup.select(".tgme_widget_message_text") for msg in msgs[:20]: text = msg.get_text(separator=" ").strip() if not text: continue # Check if query appears in message if q.lower() in text.lower() or qt == "username": found_patterns: Dict[str, List] = {} for pat, label in self.PATTERNS: matches = pat.findall(text) if matches: found_patterns[label] = matches[:5] hits.append({ "channel": channel, "text": text[:500], "patterns": found_patterns, "contains_target": q.lower() in text.lower(), }) except Exception: continue return hits def _dork_misconfigs(self, q: str, qt: str) -> List[dict]: """ Automate search queries for exposed public misconfigurations (index of, .env, sql_dump files) associated with the target domain. """ hits = [] if qt not in ("domain", "email", "name"): return hits if qt == "name": dorks = [ f'"{q}" filetype:pdf', f'"{q}" filetype:xlsx', f'"{q}" site:pastebin.com', f'"{q}" intext:"password"', f'"{q}" "database dump"', f'"{q}" site:github.com', ] else: target = q if qt == "domain" else q.split("@")[1] if "@" in q else q dorks = [ f'site:{target} intitle:"index of"', f'site:{target} intitle:"index of" ".env"', f'site:{target} intitle:"index of" "sql_dump"', f'site:{target} intitle:"index of" "backup"', f'site:{target} ext:env', f'site:{target} ext:sql', f'"{target}" filetype:env', f'"{target}" filetype:sql "sql_dump"', ] for dork in dorks: try: resp = self.s.get(f"{random.choice(_SEARX_INSTANCES)}/search?q={urllib.parse.quote(dork)}&format=json&categories=general", timeout=10) if resp.ok: try: data = resp.json() for r in data.get("results", [])[:5]: if r.get("title"): hits.append({ "dork": dork, "title": r["title"], "url": r.get("url", ""), }) except Exception: pass time.sleep(random.uniform(2.0, 4.0)) except Exception: continue return hits def _fetch_content(self, paste: dict) -> str: try: site = paste.get("site","") pid = paste.get("id","") data = paste.get("data",{}) if not pid: return "" raw_urls: dict = {} # paste fetch URLs — resolved per site name if site == "IntelX": key = self.db.get_key("intelx") if key: resp = self.s.get(f"https://2.intelx.io/file/read?type=1&systemid={pid}&k={key}", timeout=15) if resp.ok: return resp.text[:10000] elif site in raw_urls: resp = self.s.get(raw_urls[site], timeout=10) if resp.ok and resp.text: return resp.text[:10000] if isinstance(data, dict): for k in ("content","text","body","raw","paste"): if data.get(k): return str(data[k])[:10000] except Exception: pass return "" @staticmethod async def extract_patterns(text: str) -> dict: patterns = { "phones": r'\+[1-9]\d{1,14}\b', "addresses": r'\d+\s+[A-Za-z0-9\s]+(?:Street|St|Avenue|Ave|Road|Rd|Via|Piazza|Corso|Largo)\W+[A-Za-z\s]+', "handles": r'@[A-Za-z0-9_]+', } await asyncio.sleep(0) return {key: re.findall(pattern, text) for key, pattern in patterns.items()} # ======================================================================= # HASH ENGINE # ======================================================================= class HashEngine: TYPES = [ ("MD5", re.compile(r"^[a-f0-9]{32}$", re.I), "md5"), ("SHA1", re.compile(r"^[a-f0-9]{40}$", re.I), "sha1"), ("SHA224", re.compile(r"^[a-f0-9]{56}$", re.I), "sha224"), ("SHA256", re.compile(r"^[a-f0-9]{64}$", re.I), "sha256"), ("SHA384", re.compile(r"^[a-f0-9]{96}$", re.I), "sha384"), ("SHA512", re.compile(r"^[a-f0-9]{128}$", re.I), "sha512"), ("NTLM", re.compile(r"^[a-f0-9]{32}$", re.I), "ntlm"), ("MySQL", re.compile(r"^\*[A-F0-9]{40}$"), "mysql"), ("bcrypt", re.compile(r"^\$2[aby]?\$\d{2}\$"), "bcrypt"), ("Argon2", re.compile(r"^\$argon2"), "argon2"), ("SHA512Crypt", re.compile(r"^\$6\$"), "sha512crypt"), ("SHA256Crypt", re.compile(r"^\$5\$"), "sha256crypt"), ("MD5Crypt", re.compile(r"^\$1\$"), "md5crypt"), ("WordPress", re.compile(r"^\$P\$"), "wordpress"), ("phpBB", re.compile(r"^\$H\$"), "phpbb"), ("Drupal", re.compile(r"^\$S\$"), "drupal"), ("Django-SHA256",re.compile(r"^pbkdf2_sha256\$"), "django"), ("LM", re.compile(r"^[a-f0-9]{32}$", re.I), "lm"), ("CRC32", re.compile(r"^[a-f0-9]{8}$", re.I), "crc32"), ] COMMON_PASS = [ "password","123456","12345678","qwerty","abc123","monkey","1234567","letmein", "trustno1","dragon","baseball","iloveyou","master","sunshine","ashley","bailey", "shadow","123123","654321","superman","qazwsx","michael","football","password1", "password123","admin","admin123","root","toor","test","guest","welcome","login", "pass","pass123","1234","12345","123456789","1234567890","0987654321","111111", "666666","888888","000000","P@ssw0rd","P@ss1234","Welcome1","Ch@ngeme","Qwerty123", "Summer2024","Winter2025","Spring2024","Fall2024","Password123!","Admin@123", "Root@123","Qwerty@123","1qaz2wsx","1qaz@WSX","q1w2e3r4","Password1!", "Admin123!","Welcome@2025","Changeme123","P@ssword2025","Secure@123", ] LEET_MAP = {"a":"@4","e":"3","i":"1!","o":"0","s":"$5","t":"7","l":"1","g":"9","b":"8"} def __init__(self, db: "DB", session: "Session" = None) -> None: self.db = db self._session = session def identify(self, h: str) -> List[Tuple[str, str]]: types = [(name, tag) for name, pat, tag in self.TYPES if pat.match(h)] # For 32-char hex, MD5/NTLM/LM all match the same pattern. # Return only MD5 (most common in breach data) to avoid wasting # crack cycles on tags that have no hashlib implementation. if len(types) > 1: seen_tags: set = set() deduped = [] for name, tag in types: if tag not in seen_tags: seen_tags.add(tag) deduped.append((name, tag)) # If the set contains md5/ntlm/lm ambiguity, keep only md5 tags = {t for _, t in deduped} if "md5" in tags and ("ntlm" in tags or "lm" in tags): deduped = [(n, t) for n, t in deduped if t not in ("ntlm", "lm")] types = deduped return types if types else [("Unknown", "unknown")] def crack(self, h: str) -> dict: cached = self.db.get_plain(h) if cached: return {"hash":h,"plaintext":cached,"method":"Cache","types":self.identify(h)} types = self.identify(h) result = {"hash":h,"plaintext":None,"method":None,"types":types} for fn, method in [(self._dict_attack,"Dictionary+Mutations"),(self._online,"Online Rainbow"),(self._hashmob,"Hashmob Community"),(self._extended,"Extended Mutations")]: plain = fn(h) if fn != self._dict_attack else fn(h, types) if plain: result["plaintext"] = plain result["method"] = method self._cache(h, plain, method) return result return result def _dict_attack(self, h: str, types: list) -> Optional[str]: h_low = h.lower() for pw in self.COMMON_PASS: for mutation in self._mutate(pw): for _, tag in types: try: if tag == "md5" and hashlib.md5(mutation.encode()).hexdigest() == h_low: return mutation if tag == "sha1" and hashlib.sha1(mutation.encode()).hexdigest() == h_low: return mutation if tag == "sha256" and hashlib.sha256(mutation.encode()).hexdigest() == h_low: return mutation if tag == "sha512" and hashlib.sha512(mutation.encode()).hexdigest() == h_low: return mutation except Exception: continue return None def _mutate(self, word: str) -> List[str]: mutations = [word, word.upper(), word.lower(), word.capitalize(), word+"!", word+"1", word+"123", word+"@", word+"#", word+"2024", word+"2025", word[::-1], word+word, word.capitalize()+"!", word.capitalize()+"1", word+"!@#", word+"123!", word+"123@", word+"123#"] leet = word.lower() for c, replacements in self.LEET_MAP.items(): for r in replacements: mutations.append(leet.replace(c, r, 1)) return list(set(mutations)) def _online(self, h: str) -> Optional[str]: try: from sources.helpers.config_handler import ConfigManager # type: ignore key = ConfigManager.get_key("HASHES_COM_API_KEY") if not key: return None apis = [(f"https://hashes.com/en/api/search?hash={h}&key={key}", "json")] except Exception: return None _get = self._session.get if self._session else (lambda url, **kw: Session._null_response(url)) for url, fmt in apis: try: resp = _get(url, timeout=8) if not resp.ok: continue data = resp.json() if data.get("result") or data.get("plaintext"): return data.get("result", data.get("plaintext", "")) except Exception: continue return None def _hashmob(self, h: str) -> Optional[str]: try: if not self._session: return None resp = self._session.post("https://hashmob.net/api/v2/search", json_data={"hashes": [h]}, timeout=10) if resp.ok: data = resp.json() results = data.get("data") or [] if isinstance(results, list) and results: return results[0].get("plaintext") or results[0].get("result") or None except Exception: pass return None def _extended(self, h: str) -> Optional[str]: extra = ["password!","admin!","root123","test1234","welcome1","changeme","P@ssword1","Passw0rd!","S3cure!","l3tm3in","p4ssw0rd","Summer2024","Winter2025"] h_low = h.lower() types = self.identify(h) for pw in extra: for mutation in self._mutate(pw): for _, tag in types: try: if tag == "md5" and hashlib.md5(mutation.encode()).hexdigest() == h_low: return mutation if tag == "sha1" and hashlib.sha1(mutation.encode()).hexdigest() == h_low: return mutation if tag == "sha256" and hashlib.sha256(mutation.encode()).hexdigest() == h_low: return mutation except Exception: continue return None def _cache(self, h: str, p: str, m: str) -> None: try: self.db.store_hash(h, "", p, m) except Exception: pass # ======================================================================= # PASSWORD ANALYZER # ======================================================================= class PassAnalyzer: KEYBOARD_WALKS = ["qwerty","qwertz","azerty","asdf","zxcv","qwer","1234","4321","1qaz","2wsx","3edc","4rfv","5tgb","6yhn","7ujm","qazwsx","zxcvbn","poiuyt","1qaz2wsx","q1w2e3r4","qwertyuiop","asdfghjkl","zxcvbnm"] DATE_PATS = [re.compile(r"\d{4}[-/]\d{2}[-/]\d{2}"), re.compile(r"\d{2}[-/]\d{2}[-/]\d{4}"), re.compile(r"(?:19|20)\d{2}"), re.compile(r"\d{8}")] LEET_REV = {"@":"a","4":"a","3":"e","1":"il","!":"i","0":"o","$":"s","5":"s","7":"t","9":"g","8":"b"} _COMMON_FALLBACK = {"password","123456","12345678","qwerty","abc123","monkey","1234567","letmein","trustno1","dragon","baseball","iloveyou","master","sunshine","ashley","bailey","shadow","123123","654321","superman","qazwsx","michael","football","password1","admin","root","welcome","login","test","guest","pass","qwertyuiop","qwerty123","passw0rd","P@ssw0rd","admin123","root123","welcome1","login123","test123","guest123","password123"} @classmethod def _load_common(cls) -> set: """Load wordlist from ~/.nox/wordlists/ if available, else use fallback set.""" for name in ("10k-most-common.txt", "common-passwords.txt", "rockyou-top1000.txt"): p = Cfg.WORDLISTS / name if p.exists(): try: words = {l.strip().lower() for l in p.read_text(errors="ignore").splitlines() if l.strip()} if words: return words except Exception: pass return cls._COMMON_FALLBACK @classmethod def _get_common(cls) -> set: if not hasattr(cls, "_common_cache"): cls._common_cache = cls._load_common() return cls._common_cache def analyze(self, password: str) -> dict: length = len(password) charsets = 0; charset_names = [] if re.search(r"[a-z]", password): charsets += 26; charset_names.append("lowercase") if re.search(r"[A-Z]", password): charsets += 26; charset_names.append("uppercase") if re.search(r"[0-9]", password): charsets += 10; charset_names.append("digits") if re.search(r"[^a-zA-Z0-9]", password): charsets += 33; charset_names.append("symbols") entropy = length * math.log2(max(charsets, 1)) if charsets else 0 patterns = []; penalties = 0 if password.lower() in self._get_common(): patterns.append("Common password (top 10K)"); penalties += 40 for walk in self.KEYBOARD_WALKS: if walk in password.lower(): patterns.append(f"Keyboard walk: {walk}"); penalties += 15; break for pat in self.DATE_PATS: if pat.search(password): patterns.append("Date pattern detected"); penalties += 10; break if re.search(r"(.)\1{2,}", password): patterns.append("Repeated characters"); penalties += 10 deleet = password for leet, orig in self.LEET_REV.items(): deleet = deleet.replace(leet, orig[0]) if deleet.lower() != password.lower() and deleet.lower() in self._get_common(): patterns.append(f"Leet speak of common password: {deleet.lower()}"); penalties += 30 raw_score = min(100, int(entropy * 1.5)) final_score = max(0, raw_score - penalties) speeds = [("Online (10/s)",10),("Throttled (1K/s)",1000),("Offline fast (1B/s)",1_000_000_000),("GPU cluster (100B/s)",100_000_000_000)] crack_times = {} for label, speed in speeds: # Use logarithms to avoid OverflowError on very long passwords if charsets <= 1 or length == 0: secs = 0.0 else: log_secs = length * math.log10(max(charsets, 1)) - math.log10(speed) secs = 0.0 if log_secs < 0 else (float('inf') if log_secs > 300 else 10 ** log_secs) if secs == 0.0 or secs < 1: crack_times[label] = "Instant" elif math.isinf(secs): crack_times[label] = "> 10^300 years" elif secs < 60: crack_times[label] = f"{secs:.0f} seconds" elif secs < 3600: crack_times[label] = f"{secs/60:.0f} minutes" elif secs < 86400: crack_times[label] = f"{secs/3600:.0f} hours" elif secs < 86400*365: crack_times[label] = f"{secs/86400:.0f} days" elif secs < 86400*365*1000: crack_times[label] = f"{secs/(86400*365):.0f} years" else: crack_times[label] = f"{secs/(86400*365):.2e} years" if final_score >= 80: strength = "VERY STRONG" elif final_score >= 60: strength = "STRONG" elif final_score >= 40: strength = "MODERATE" elif final_score >= 20: strength = "WEAK" else: strength = "VERY WEAK" return {"password":password,"length":length,"entropy":round(entropy,2),"charsets":charset_names,"charset_size":charsets,"patterns":patterns,"penalties":penalties,"score":final_score,"raw_score":raw_score,"strength":strength,"crack_times":crack_times} # ======================================================================= # CREDENTIAL ANALYZER — Temporal Correlation & Deduplication # ======================================================================= class CredAnalyzer: @staticmethod def analyze(records: list) -> dict: if not records: return {} emails: Dict[str,int] = {}; passwords: Dict[str,int] = {}; domains: Dict[str,int] = {} timeline = []; stealer_logs = [] total_crit = total_high = total_med = 0 dedup_seen: Set[str] = set() unique_records = [] for r in records: dk = r.dedup_key() if hasattr(r, "dedup_key") else "" if dk and dk in dedup_seen: continue if dk: dedup_seen.add(dk) unique_records.append(r) em = _rec_get(r, "email") pw = _rec_get(r, "password") dom = _rec_get(r, "domain") sev = _rec_get(r, "severity") or Severity.INFO if em: emails[em] = emails.get(em, 0) + 1 if pw: passwords[pw] = passwords.get(pw, 0) + 1 if dom: domains[dom] = domains.get(dom, 0) + 1 bd = _rec_get(r, "breach_date") if bd: timeline.append({"date":bd,"breach":_rec_get(r,"breach_name"),"severity":sev.name if isinstance(sev,Severity) else str(sev)}) if any(x in str(_rec_get(r,"data_types") or []).lower() for x in ["stealer","redline","raccoon","vidar","infostealer"]): stealer_logs.append(r) sev_name = sev.name if isinstance(sev, Severity) else str(sev).upper() if sev_name == "CRITICAL": total_crit += 1 elif sev_name == "HIGH": total_high += 1 elif sev_name == "MEDIUM": total_med += 1 reused = {pw: cnt for pw, cnt in passwords.items() if cnt > 1} score = min(100, total_crit*25 + total_high*10 + total_med*3 + len(stealer_logs)*20 + len(reused)*15) timeline.sort(key=lambda x: x.get("date","")) persistence_scores = [getattr(r,"persistence_score",0.0) for r in unique_records if getattr(r,"persistence_score",0.0) > 0] avg_persistence = round(sum(persistence_scores)/len(persistence_scores),1) if persistence_scores else 0.0 return { "total_records": len(records), "unique_records": len(unique_records), "unique_emails": len(emails), "top_emails": sorted(emails.items(), key=lambda x: -x[1])[:10], "unique_passwords": len(passwords), "passwords_found": len(passwords), "reused_passwords": reused, "unique_domains": len(domains), "top_domains": sorted(domains.items(), key=lambda x: -x[1])[:10], "stealer_logs": len(stealer_logs), "hvt_count": sum(1 for r in unique_records if getattr(r, "is_hvt", False) or (isinstance(r, dict) and r.get("is_hvt"))), "severity": {"critical":total_crit,"high":total_high,"medium":total_med}, "risk_score": score, "timeline": timeline[:20], "avg_persistence": avg_persistence, } # ======================================================================= # PIVOT MANAGER — Recursive Data Enrichment Engine # ======================================================================= class PivotManager: """ Builds identity graphs by automatically triggering sub-queries on high-confidence pivot candidates (usernames, secondary emails, phones) up to a configurable depth, with a strict seen-targets set to prevent infinite loops. """ def __init__(self, orchestrator: "Orchestrator", max_depth: int = None) -> None: self._orc = orchestrator self._max_depth = max_depth or Cfg.PIVOT_DEPTH self._seen: Set[str] = set() def enrich(self, seed_records: List[Record], seed_target: str) -> List[Record]: """ Given an initial set of records, extract pivot candidates and recursively scan them, returning all discovered records. """ self._seen.add(seed_target.lower()) all_records = list(seed_records) self._pivot(seed_records, depth=1, all_records=all_records) return all_records def _pivot(self, records: List[Record], depth: int, all_records: List[Record]) -> None: if depth > self._max_depth: return # Only pivot on records with sufficient source confidence confident = [r for r in records if getattr(r, "source_confidence", 1.0) >= Cfg.PIVOT_CONFIDENCE] candidates = self._extract_candidates(confident or records) for candidate, qtype in candidates: key = candidate.lower() if key in self._seen: continue self._seen.add(key) out("pivot", f" [Depth {depth}] Pivoting on {qtype}: {candidate}") try: new_records = self._orc.scan(candidate, qtype) if new_records: all_records.extend(new_records) self._pivot(new_records, depth + 1, all_records) except Exception as exc: logger.debug("Pivot error %s: %s", candidate, exc) @staticmethod def _extract_candidates(records: List[Record]) -> List[Tuple[str, str]]: candidates: List[Tuple[str, str]] = [] seen_vals: Set[str] = set() for r in records: for val, qtype in [ (_rec_get(r, "email"), "email"), (_rec_get(r, "username"), "username"), (_rec_get(r, "phone"), "phone"), (_rec_get(r, "full_name"), "name"), (_rec_get(r, "name"), "name"), ]: if val and val.lower() not in seen_vals and len(val) > 3: seen_vals.add(val.lower()) candidates.append((val, qtype)) return candidates[:30] # ======================================================================= # ASYNC ORCHESTRATOR — Full asyncio event loop # ======================================================================= class Orchestrator: def __init__(self, config: NoxConfig = None, db: NoxDB = None) -> None: self.config = config or NoxConfig() self.db = db or NoxDB() self.session = Session(self.config) self.hash_engine = HashEngine(self.db, self.session) self.pass_analyzer = PassAnalyzer() self.dork_engine = DorkEngine(self.session) self.scrape_engine = ScrapeEngine(self.session, self.db) self.intel_db = DatabaseManager() self.dorking_engine = DorkingEngine(self.config.concurrency, self.db, self.config) self._json_sources: List["JSONSourceLoader"] = [] self._source_orchestrator: Optional["SourceOrchestrator"] = None def _get_semaphore(self) -> asyncio.Semaphore: # Always create a fresh semaphore bound to the current running loop. return asyncio.Semaphore(self.config.concurrency) # ── Async core scan ─────────────────────────────────────────────── async def _async_scan(self, target: str, query_type: str) -> List[Record]: """ Run all source queries as non-blocking coroutines managed by a global asyncio.Semaphore. """ # ── Fail-Safe Proxy check (transport-level, before any connection) ── ProxyManager.fail_safe_check(self.config, allow_leak=self.config.allow_leak) # SourceOrchestrator is created once and reused across calls. The semaphore # is rebound on each invocation so concurrency limits are always respected. if self._source_orchestrator is None: self._source_orchestrator = SourceOrchestrator( self._get_semaphore(), self.db, self.config ) self._source_orchestrator._ensure_loaded() else: # Rebind semaphore AND propagate to all loaded source instances new_sem = self._get_semaphore() self._source_orchestrator._sem = new_sem for src in (self._source_orchestrator._nox_sources + self._source_orchestrator._fs_providers + self._source_orchestrator._py_providers): src._sem_obj = new_sem sources = self._source_orchestrator.get_sources(self.session, query_type) out("info", f"Active sources: {len(sources)} / {self._source_orchestrator.plugin_count()} (filtered for input type: {query_type})") if not aiohttp_mod: # Fallback: synchronous thread pool from concurrent.futures import ThreadPoolExecutor, as_completed records = [] with ThreadPoolExecutor(max_workers=self.config.concurrency) as executor: futures = {executor.submit(src.search, target, query_type): src for src in sources} for i, future in enumerate(as_completed(futures), 1): src = futures[future] try: recs = future.result(timeout=self.config.timeout + 5) if recs: records.extend(recs) out("ok", f" [{i}/{len(sources)}] {src.name}: {len(recs)} results") else: out("dim", f" [{i}/{len(sources)}] {src.name}: 0 results") except Exception as exc: out("dim", f" [{i}/{len(sources)}] {src.name}: error - {str(exc)[:50]}") return records connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX, limit=self.config.concurrency, family=0) # family=0 → AF_UNSPEC (IPv4+IPv6) # SOCKS5 proxies require ProxyConnector — aiohttp trust_env does not support SOCKS5. _socks5_connector = False if self.config.proxy and self.config.proxy.startswith("socks5"): try: from aiohttp_socks import ProxyConnector as _ProxyConnector # type: ignore connector = _ProxyConnector.from_url(self.config.proxy, ssl=_SSL_CTX, limit=self.config.concurrency) _socks5_connector = True except ImportError: logger.warning("aiohttp_socks not installed — SOCKS5 proxy bypassed. Install: pip install aiohttp-socks") # Set proxy environment variables for HTTP/S proxies so aiohttp trust_env picks them up. # A module-level lock prevents concurrent scans from racing on the shared env vars. _proxy_env_set = False if self.config.proxy and not _socks5_connector and not os.environ.get("HTTPS_PROXY"): with _PROXY_ENV_LOCK: if not os.environ.get("HTTPS_PROXY"): os.environ["HTTPS_PROXY"] = self.config.proxy os.environ["HTTP_PROXY"] = self.config.proxy _proxy_env_set = True session_kwargs: dict = {"trust_env": True} if (self.config.proxy and not _socks5_connector) else {} # Per-source semaphores — fresh each call, bound to the current running loop. _source_sems: Dict[str, asyncio.Semaphore] = {} try: async with aiohttp_mod.ClientSession(connector=connector, **session_kwargs) as session: _counter = [0] # Breach sources only — DorkingEngine is dispatched separately in fullscan/autoscan. tasks = [ asyncio.create_task(self._run_source(session, src, target, query_type, _counter, len(sources), _source_sems)) for src in sources ] results = await asyncio.gather(*tasks, return_exceptions=True) finally: if _proxy_env_set: os.environ.pop("HTTPS_PROXY", None) os.environ.pop("HTTP_PROXY", None) records = [] for r in results: if isinstance(r, list): records.extend(r) return records async def _run_source(self, session, src, target: str, qtype: str, counter: list, total: int, source_sems: dict = None) -> List[Record]: # Per-source semaphore: max 3 concurrent requests per source if source_sems is None: source_sems = {} src_name = getattr(src, "name", "unknown") if src_name not in source_sems: source_sems[src_name] = asyncio.Semaphore(3) try: async with source_sems[src_name]: recs = await src.async_search(session, target, qtype) counter[0] += 1 idx = counter[0] if recs: out("ok", f" [{idx}/{total}] {src.name}: {len(recs)} results") else: out("dim", f" [{idx}/{total}] {src.name}: 0 results") return recs or [] except Exception as exc: counter[0] += 1 idx = counter[0] out("dim", f" [{idx}/{total}] {src.name}: error - {str(exc)[:50]}") return [] # ── Public scan API ─────────────────────────────────────────────── def scan(self, target: str, query_type: str = None) -> List[Record]: if not query_type: query_type = Detect.qtype(target) out("info", f"Scanning: {target} (type: {query_type})") try: loop = asyncio.get_running_loop() except RuntimeError: loop = None try: if loop and loop.is_running(): import concurrent.futures with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex: records = ex.submit( asyncio.run, self._full_async_scan(target, query_type) ).result(timeout=300) else: records = asyncio.run(self._full_async_scan(target, query_type)) except Exception: records = [] return records async def _full_async_scan(self, target: str, query_type: str) -> List[Record]: """Async pipeline: cache-check → network scan → score → persist → dehash → reputation.""" # Cache check try: cached = await self.intel_db.get_cached(target) if cached: out("ok", f"Cache hit: {len(cached)} records (< 24 h old)") return self._hydrate_cache(cached) except Exception as exc: logger.debug("Cache check failed: %s", exc) records = await self._async_scan(target, query_type) out("ok", f"\nScan complete: {len(records)} records") records = [RiskEngine.score(r) for r in records] records = RiskEngine.apply_persistence(records) HVTAnalyzer.annotate(records) # Vault AutoDehash hook — run in executor to avoid blocking the event loop loop = asyncio.get_running_loop() records = await loop.run_in_executor(None, Vault.autodehash, records, self.db) # DeHash & Reputation enrichment — run concurrently (best-effort, non-blocking) if aiohttp_mod: connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX, limit=5) async with aiohttp_mod.ClientSession(connector=connector) as enrich_session: dehash_eng = DeHashEngine(self.db, self.config) rep_eng = ReputationEngine(self.config) _dehash_res, rep_result = await asyncio.gather( dehash_eng.dehash_records(enrich_session, records), rep_eng.check(enrich_session, target, query_type), return_exceptions=True, ) if isinstance(_dehash_res, list): records = _dehash_res if isinstance(rep_result, dict) and rep_result: out("info", f"VirusTotal: {rep_result['malicious']} malicious, " f"{rep_result['suspicious']} suspicious detections for {target}") try: await self.intel_db.cache_records(target, query_type, records) except Exception as exc: logger.debug("DB persist failed: %s", exc) return records async def fullscan(self, target: str, pivot: bool = True): """Full autoscan: Recursive Avalanche Engine — breach + dork + scrape on every discovered asset.""" out("info", f"[*] Avalanche scan starting: {target}") _t0 = time.time() if _HAS_AVALANCHE and pivot: engine = AvalancheScanner(self) all_records, dork_results, scrape_results = await engine.run(target) pivot_chain = [target] + [a for a in engine.seen_assets if a != target.lower()] pivot_depth = engine.get_max_depth() pivot_log = engine.pivot_log discovered_assets = engine.get_discovered_assets() else: all_records = await self._full_async_scan(target, Detect.qtype(target)) loop = asyncio.get_running_loop() dork_results, scrape_results = await asyncio.gather( self.async_dork(target), loop.run_in_executor(None, self.scrape, target), return_exceptions=True, ) if isinstance(dork_results, Exception): dork_results = [] if isinstance(scrape_results, Exception): scrape_results = {} pivot_chain = [target] pivot_depth = 0 pivot_log = [] discovered_assets = [] # ── Enrich scraped results into records ─────────────────────── for cred in scrape_results.get("credentials", []): raw = cred.get("raw", "") if ":" in raw: parts = raw.split(":", 1) em, pw = parts[0].strip(), parts[1].strip() r = Record(source=cred.get("source", "ScrapeEngine"), email=em if "@" in em else "", username=em if "@" not in em else "", password=pw, breach_name=cred.get("paste_id", ""), data_types=["Scraped", "Credentials"]) else: r = Record(source=cred.get("source", "ScrapeEngine"), raw_data=cred, breach_name=cred.get("paste_id", ""), data_types=["Scraped"]) r = RiskEngine.score(r) all_records.append(r) for paste in scrape_results.get("pastes", []): r = Record(source=paste.get("source", "PasteScraper"), breach_name=paste.get("id", ""), raw_data=paste, data_types=["Paste"]) r = RiskEngine.score(r) all_records.append(r) for tg in scrape_results.get("telegram", []): r = Record(source=f"Telegram/{tg.get('channel', 'unknown')}", raw_data=tg, data_types=["Telegram"]) r = RiskEngine.score(r) all_records.append(r) for mc in scrape_results.get("dork_misconfigs", []): r = Record(source="MisconfigScraper", domain=mc.get("url", ""), raw_data=mc, data_types=["Misconfiguration"]) r = RiskEngine.score(r) all_records.append(r) analysis = CredAnalyzer.analyze(all_records) HVTAnalyzer.annotate(all_records) # set is_hvt field on every record hvt_records = HVTAnalyzer.filter_hvt(all_records) return { "target": target, "records": all_records, "analysis": analysis, "hvt_records": hvt_records, "dork_results": dork_results, "scrape_results": scrape_results, "pivot_chain": pivot_chain, "pivot_log": pivot_log, "discovered_assets": discovered_assets, "scan_meta": { "elapsed_seconds": round(time.time() - _t0, 1), "pivot_depth": pivot_depth, "nodes_discovered": len({ v.lower() for r in all_records for v in [ _rec_get(r, "email"), _rec_get(r, "username"), _rec_get(r, "ip_address"), _rec_get(r, "phone"), _rec_get(r, "domain"), ] if v }), }, } def crack(self, hash_value: str) -> dict: return self.hash_engine.crack(hash_value) def analyze_pass(self, password: str) -> dict: return self.pass_analyzer.analyze(password) def dork(self, target: str, query_type: str = None) -> List[dict]: if not query_type: query_type = Detect.qtype(target) return self.dork_engine.run(target, query_type) async def async_dork(self, target: str, session=None) -> List[dict]: """Native async dork dispatch via DorkingEngine.""" try: import aiohttp as _aio # type: ignore if session is None: connector = _aio.TCPConnector(limit=10, ssl=_SSL_CTX, family=0) async with _aio.ClientSession(connector=connector) as _s: records = await self.dorking_engine.async_search(_s, target, Detect.qtype(target)) else: records = await self.dorking_engine.async_search(session, target, Detect.qtype(target)) return [ { "url": r.raw_data.get("url", "") if hasattr(r, "raw_data") else "", "title": r.raw_data.get("url", r.raw_data.get("dork", "")) if hasattr(r, "raw_data") else "", "snippet": "", "dork": r.raw_data.get("dork", "") if hasattr(r, "raw_data") else "", "engine": "DDG", } for r in records ] except Exception as exc: logger.debug("async_dork %s: %s", target, exc) return [] def scrape(self, target: str, query_type: str = None) -> dict: if not query_type: query_type = Detect.qtype(target) return self.scrape_engine.run(target, query_type) @staticmethod def _hydrate_cache(cached: List[dict]) -> List[Record]: records = [] for d in cached: try: dt = d.get("data_types","[]") if isinstance(dt, str): try: dt = json.loads(dt) except Exception: dt = [] rs = float(d.get("risk_score", 0.0)) if rs >= 90: sev = Severity.CRITICAL elif rs >= 70: sev = Severity.HIGH elif rs >= 40: sev = Severity.MEDIUM elif rs >= 10: sev = Severity.LOW else: sev = Severity.INFO records.append(Record( source=d.get("source",""), email=d.get("email",""), username=d.get("username",""), password=d.get("password",""), password_hash=d.get("password_hash",""), hash_type=d.get("hash_type",""), phone=d.get("phone",""), breach_name=d.get("breach_name",""), breach_date=d.get("breach_date",""), data_types=dt, severity=sev, risk_score=rs, source_confidence=float(d.get("source_conf",0.5)), is_hvt=bool(d.get("is_hvt",0)), )) except Exception: continue return records # ======================================================================= # ADVANCED REPORTER # ======================================================================= class AdvancedReporter: # Control characters and binary garbage that break PDF/terminal rendering _CTRL_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]") @staticmethod def sanitize_payload(value: Any) -> str: """ Central sanitization for all user-supplied / breach-sourced strings. 1. Coerce to str. 2. Strip control characters and binary garbage (safe for PDF/terminal). 3. HTML-escape the result (safe for HTML embedding — prevents XSS). Example: '' → '<script>alert(1)</script>' """ s = str(value) if value is not None else "" s = AdvancedReporter._CTRL_RE.sub("", s) return html_module.escape(s) @staticmethod def _raw(value: Any) -> str: """Strip control chars only — no HTML escaping (for PDF/CSV/plain-text paths).""" s = str(value) if value is not None else "" return AdvancedReporter._CTRL_RE.sub("", s) @staticmethod def _build_summary(records: list) -> dict: identities: Set[str] = set(); hvt_list = []; stealers = 0 buckets = {"Critical":0,"High":0,"Medium":0,"Low":0,"Info":0} pw_patterns: Dict[str,int] = {}; top_threats = [] for r in records: ident = _rec_get(r,"email") or _rec_get(r,"username") if ident: identities.add(ident) if HVTAnalyzer.is_hvt(r): hvt_list.append(ident) if _is_stealer(r): stealers += 1 rs = float(_rec_get(r,"risk_score") or 0) if rs >= 90: buckets["Critical"] += 1 elif rs >= 70: buckets["High"] += 1 elif rs >= 40: buckets["Medium"] += 1 elif rs >= 10: buckets["Low"] += 1 else: buckets["Info"] += 1 pw = _rec_get(r,"password") if pw: if re.search(r"[A-Z]",pw) and re.search(r"\d",pw) and re.search(r"[!@#$%^&*]",pw): pat = "Complex" elif _CORP_PW_RE.match(pw): pat = "Corporate (Word+Year+Symbol)" elif pw.isdigit(): pat = "Numeric only" elif pw.isalpha(): pat = "Alpha only" else: pat = "Other" pw_patterns[pat] = pw_patterns.get(pat,0) + 1 if rs >= 70: top_threats.append(r) top_threats.sort(key=lambda r: float(_rec_get(r,"risk_score") or 0), reverse=True) return {"total_identities":len(identities),"total_records":len(records),"hvt_list":list(dict.fromkeys(hvt_list))[:30],"hvt_count":len(set(hvt_list)),"stealer_count":stealers,"buckets":buckets,"pw_patterns":sorted(pw_patterns.items(),key=lambda x:-x[1])[:8],"top_threats":top_threats[:20]} @staticmethod def _heatmap_bar(value: float, max_val: int = 100) -> str: pct = min(100, int(value / max(max_val,1) * 100)) colour = "#ff0040" if pct >= 90 else "#ff6600" if pct >= 70 else "#ffcc00" if pct >= 40 else "#00cc44" return (f'

' f'

' f'{value:.1f}') @staticmethod def to_html(data: dict, path: str) -> None: records = data.get("records",[]) target = data.get("target","Unknown") s = AdvancedReporter._build_summary(records) rec_dicts = [r.to_dict() if hasattr(r,"to_dict") else r for r in records] kpi_html = (f'

{s["total_identities"]}

COMPROMISED IDENTITIES

' f'

{s["stealer_count"]}

STEALER LOGS

' f'

{s["hvt_count"]}

HIGH-VALUE TARGETS

' f'

{s["total_records"]}

TOTAL RECORDS

' f'

{len(data.get("discovered_assets") or [])}

REINJECTED ASSETS

') total = max(sum(s["buckets"].values()),1) heatmap_rows = "".join(f'{lvl}{AdvancedReporter._heatmap_bar(cnt,total)}{cnt}' for lvl,cnt in s["buckets"].items()) pw_rows = "".join(f'{p}{c}{AdvancedReporter._heatmap_bar(c,max((c2 for _,c2 in s["pw_patterns"]),default=1))}' for p,c in s["pw_patterns"]) _sp = AdvancedReporter.sanitize_payload # shorthand threat_rows = "".join( f'' f'{_sp(_rec_get(r,"email") or _rec_get(r,"username"))}' f'{_sp(_rec_get(r,"password") or "")}' f'{_sp(_rec_get(r,"password_hash") or "")[:30]}' f'{_sp(_rec_get(r,"ip_address") or "")}' f'{_sp(_rec_get(r,"phone") or "")}' f'{_sp(_rec_get(r,"domain") or "")}' f'{_sp(_rec_get(r,"source"))}' f'{_sp(_rec_get(r,"breach_date"))}' f'{AdvancedReporter._heatmap_bar(float(_rec_get(r,"risk_score") or 0))}' f'{"⚑ HVT" if HVTAnalyzer.is_hvt(r) else ""}' for r in s["top_threats"] ) hvt_items = "".join(f'

⚠ {_sp(v)}

' for v in s["hvt_list"]) or "

None detected

" cred_rows = "" for r in rec_dicts[:500]: rs = float(r.get("risk_score",0) if isinstance(r,dict) else getattr(r,"risk_score",0)) cls = "crit" if rs>=90 else "high" if rs>=70 else "med" if rs>=40 else "" hvt_badge = "⚑" if HVTAnalyzer.is_hvt(r) else "" cred_rows += ( f"" f"{_sp(_rec_get(r,'email'))}{hvt_badge}" f"{_sp(_rec_get(r,'username') or '')}" f"{_sp(_rec_get(r,'password') or '')}" f"{_sp((_rec_get(r,'password_hash') or '')[:30])}" f"{_sp(_rec_get(r,'ip_address') or '')}" f"{_sp(_rec_get(r,'phone') or '')}" f"{_sp(_rec_get(r,'domain') or '')}" f"{_sp(_rec_get(r,'source'))}" f"{_sp(_rec_get(r,'breach_date'))}" f"{AdvancedReporter._heatmap_bar(rs)}" ) # ── Discovered documents section ────────────────────────────── doc_rows = "" for r in records: src = _rec_get(r, "source") if src != "DorkingEngine": continue rd = r if isinstance(r, dict) else r.raw_data if hasattr(r, "raw_data") else {} meta = (r.metadata if hasattr(r, "metadata") else {}) or {} url = rd.get("url", "") if isinstance(rd, dict) else "" ext = url.lower().rsplit(".", 1)[-1].split("?")[0] if "." in url else "" paths = "; ".join(meta.get("local_paths", [])) emails = "; ".join(meta.get("emails", [])) doc_rows += ( f"" f"{_sp(url[:80])}" f"{_sp(ext)}" f"{_sp(meta.get('author',''))}" f"{_sp(meta.get('creator',''))}" f"{_sp(paths)}" f"{_sp(emails)}" ) doc_section = (f'

🔍 Discovered Public Documents & Metadata

' f'' f'{doc_rows if doc_rows else ""}

URL	Type	Author	Creator	Local Paths	Emails
No documents found

' ) # ── Dork hits section ───────────────────────────────────────── dork_results = data.get("dork_results", []) or [] dork_hit_rows = "" for h in dork_results: url = h.get("url", "") title = h.get("title", "") snippet = h.get("snippet", "") dork_q = h.get("dork", "") engine = h.get("engine", "") link = f'{_sp(url[:90])}' if url else _sp(title[:90]) dork_hit_rows += ( f"" f"{link}" f"{_sp(snippet[:120])}" f"{_sp(dork_q[:80])}" f"{_sp(engine)}" f"" ) dork_section = ( f'

🔎 Dork Results ({len(dork_results)} hits)

' f'' f'{dork_hit_rows if dork_hit_rows else ""}

URL / Title	Snippet	Dork Query	Engine
No dork hits

' ) # ── Scrape section ──────────────────────────────────────────── scrape_results = data.get("scrape_results", {}) or {} # Pastes paste_rows = "" for p in scrape_results.get("pastes", []): site = _sp(p.get("site", "")) pid = p.get("id", "") title = _sp(p.get("title", pid)[:80]) query = _sp(p.get("query", "")[:60]) # Build a best-effort direct link paste_links = { "Pastebin": f"https://pastebin.com/{pid}", "Rentry": f"https://rentry.co/{pid}", "Hastebin": f"https://hastebin.com/{pid}", "DPaste": f"https://dpaste.org/{pid}", "Ghostbin": f"https://ghostbin.com/paste/{pid}", "JustPaste":f"https://justpaste.it/{pid}", "ControlC": f"https://controlc.com/{pid}", "Paste2": f"https://paste2.org/raw/{pid}", "PastebinPro": f"https://pastebin.com/{pid}", } link_url = paste_links.get(p.get("site", ""), "") link_html = (f'{title or pid}' if link_url else (title or _sp(pid))) patterns = p.get("patterns", {}) pat_str = _sp(", ".join(f"{k}({len(v)})" for k, v in patterns.items()) if patterns else "") paste_rows += f"{site}{link_html}{pat_str}{query}" # Credentials extracted from pastes cred_scrape_rows = "" for c in scrape_results.get("credentials", []): raw = _sp(c.get("raw", "")[:120]) src = _sp(c.get("source", "")) pid = c.get("paste_id", "") cred_scrape_rows += f"{raw}{src}{_sp(pid)}" # Telegram hits tg_rows = "" for t in scrape_results.get("telegram", []): ch = _sp(t.get("channel", "")) text = _sp(t.get("text", "")[:200]) pats = _sp(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items())) link = f't.me/s/{ch}' tg_rows += f"{link}{text}{pats}" # Misconfig dork hits mc_rows = "" for m in scrape_results.get("dork_misconfigs", []): url_m = m.get("url", "") title_m = _sp(m.get("title", "")[:80]) dork_m = _sp(m.get("dork", "")[:80]) link_m = (f'{_sp(url_m[:80])}' if url_m else title_m) mc_rows += f"{link_m}{title_m}{dork_m}" scrape_section = ( f'

📋 Scrape Results

' f'

Pastes ({len(scrape_results.get("pastes",[]))})

' f'' f'{paste_rows or ""}

Site	Paste / Link	Patterns Found	Query
None

' f'

Extracted Credentials ({len(scrape_results.get("credentials",[]))})

' f'' f'{cred_scrape_rows or ""}

Raw Credential	Source	Paste ID
None

' f'

Telegram CTI ({len(scrape_results.get("telegram",[]))})

' f'' f'{tg_rows or ""}

Channel	Message	Patterns
None

' f'

Misconfigurations ({len(scrape_results.get("dork_misconfigs",[]))})

' f'' f'{mc_rows or ""}

URL	Title	Dork
None

' f'

' ) css = ("*{margin:0;padding:0;box-sizing:border-box}body{font-family:'Courier New',monospace;background:#0a0a0a;color:#e0e0e0;padding:20px}.header{text-align:center;padding:30px;border:1px solid #333;margin-bottom:20px;background:#111}.header h1{color:#00ff41;font-size:28px;letter-spacing:4px}.header p{color:#888;margin-top:6px}.stats{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:12px;margin:15px 0}.stat{background:#111;border:1px solid #333;padding:18px;text-align:center}.stat .num{font-size:32px;font-weight:bold;color:#00ff41}.stat .label{color:#888;font-size:11px;margin-top:4px}.stat.crit .num{color:#ff0040}.stat.hvt .num{color:#ff6600}.section{margin:20px 0}.section h2{color:#00ff41;border-bottom:1px solid #333;padding-bottom:6px;margin-bottom:12px}.section h3{color:#aaa;margin:12px 0 6px}table{width:100%;border-collapse:collapse}th,td{padding:8px;border:1px solid #222;font-size:12px;word-break:break-all}th{background:#1a1a1a;color:#00ff41;text-transform:uppercase;font-size:11px}td{background:#0d0d0d}tr.crit td{background:#1a0005}tr.high td{background:#1a0a00}tr.med td{background:#1a1500}.pw{color:#ff0040;font-weight:bold}.hvt-box{background:#1a0a00;border:1px solid #ff6600;padding:12px;margin:10px 0}.hvt-box ul{padding-left:20px;color:#ff6600}.pivot-node{margin:4px 0;padding:6px 10px;border-left:2px solid #333;background:#0d0d0d}.pivot-seed{border-left-color:#00ff41}.pivot-pivot{border-left-color:#00ccff}.pivot-crack{border-left-color:#cc00ff}.pivot-asset{color:#00ccff;font-weight:bold}.pivot-stats{color:#888;font-size:11px;margin-top:3px}.pivot-children{margin-left:20px;border-left:1px solid #222;padding-left:8px}") # ── Pivot Tree HTML section ─────────────────────────────────── pivot_log = data.get("pivot_log", []) or [] if pivot_log: log_by_key_html = {e["asset"].lower(): e for e in pivot_log} def _build_pivot_html(entries: list) -> str: html = "" for e in entries: found_in = e.get("found_in", e.get("source", "?")) src_color = {"seed": "#00ff41", "breach": "#ff0040", "dork": "#ff6600", "scrape": "#cc00ff", "hash_crack": "#cc00ff", "pivot": "#00ccff"}.get(found_in, "#888") stats_parts = [] if e["records"]: stats_parts.append(f'{e["records"]} breach') if e["dorks"]: stats_parts.append(f'{e["dorks"]} dork') if e["scrape"]: stats_parts.append(f'{e["scrape"]} scrape') if e.get("cracked"): stats_parts.append(f'cracked→{_sp(", ".join(e["cracked"][:2]))}') # Children with phase+ref children = e.get("children", []) child_html_inner = "" if children: _phase_colors_html = {"breach": "#ff0040", "dork": "#ff6600", "scrape": "#cc00ff", "hash_crack": "#cc00ff"} child_html_inner = '

↳ reinjected: ' parts_ch = [] for ch in children[:6]: ph = ch.get("found_in", "?") col = _phase_colors_html.get(ph, "#888") parts_ch.append( f'[{_sp(ph)}] {_sp(ch.get("asset",""))}' ) child_html_inner += ", ".join(parts_ch) if len(children) > 6: child_html_inner += f" +{len(children)-6} more" child_html_inner += "

" # Recurse into processed children child_log_entries = [log_by_key_html[ch["asset"].lower()] for ch in children if ch.get("asset","").lower() in log_by_key_html] child_tree = _build_pivot_html(child_log_entries) if child_log_entries else "" html += ( f'

' f'[{found_in.upper()}] ' f'{_sp(e["asset"])} ' f'({_sp(e["qtype"])})' + (f' ← {_sp(e["parent"])}' if e.get("parent") else "") + (f'

{" | ".join(stats_parts)}

' if stats_parts else "") + child_html_inner + (f'

{child_tree}

' if child_tree else "") + '

' ) return html roots_html = [e for e in pivot_log if e["depth"] == 0] pivot_tree_html = _build_pivot_html(roots_html) pivot_section = ( f'

🔄 Pivot Tree ({len(pivot_log)} nodes)

' f'{pivot_tree_html}

' ) else: pivot_section = "" # ── Discovered Assets section ───────────────────────────────── discovered_assets = data.get("discovered_assets", []) or [] _phase_badge_colors = { "breach": "#ff0040", "dork": "#ff6600", "scrape": "#cc00ff", "hash_crack": "#cc00ff", "seed": "#00ff41", } da_rows = "" for da in discovered_assets: phase = da.get("phase", "?") ref = da.get("ref", "") ref_html = (f'{_sp(ref[:80])}' if ref.startswith("http") else _sp(ref[:100])) badge_col = _phase_badge_colors.get(phase, "#888") da_rows += ( f"" f"{_sp(da.get('asset',''))}" f"{_sp(da.get('qtype',''))}" f"{_sp(phase.upper())}" f"{ref_html}" f"{_sp(da.get('parent',''))}" f"{da.get('depth',0)}" f"" ) discovered_section = ( f'

🔎 Discovered Assets ({len(discovered_assets)} new identifiers reinjected)

' f'' f'{da_rows if da_rows else ""}

Asset	Type	Phase	Reference (Source / URL / Paste)	Discovered From	Depth
No pivot assets discovered

' ) page = (f'NOX Framework — {_sp(target)}' f'' f'

📋 Executive Summary

{kpi_html}

' f'

Risk Heatmap

{heatmap_rows}

Level	Distribution	#

' f'

Password Patterns

{pw_rows}

Pattern	Count	Prevalence

' f'

⚠ High-Value Targets ({s["hvt_count"]})

{hvt_items}

' f'

🚨 Top Threats

{threat_rows}

Identity	Password	Hash	IP	Phone	Domain	Source	Date	Risk	Flag

' f'{pivot_section}' f'{discovered_section}' f'{doc_section}' f'{dork_section}' f'{scrape_section}' f'

Credential Records (top 500)

{cred_rows}

Email	Username	Password	Hash	IP	Phone	Domain	Source	Date	Risk

' f'') with open(path, "w", encoding="utf-8") as fh: fh.write(page) out("ok", f"HTML report saved: {path}") @staticmethod def to_markdown(data: dict, path: str) -> None: records = data.get("records",[]) target = data.get("target","Unknown") s = AdvancedReporter._build_summary(records) ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") _r = AdvancedReporter._raw # strip control chars, no HTML escaping for markdown lines = ["# NOX Framework Report","",f"**Target:** `{_r(target)}` ",f"**Generated:** {ts} ",f"**Version:** {VERSION}","","---","## Executive Summary","","| Metric | Value |","|--------|-------|",f"| Compromised Identities | **{s['total_identities']}** |",f"| Total Records | **{s['total_records']}** |",f"| Stealer Logs | **{s['stealer_count']}** |",f"| High-Value Targets | **{s['hvt_count']}** |","","### Risk Distribution","","| Level | Count |","|-------|-------|"] for lvl, cnt in s["buckets"].items(): if cnt: lines.append(f"| {lvl} | {cnt} |") lines += ["","### Password Patterns","","| Pattern | Count |","|---------|-------|"] for p, c in s["pw_patterns"]: lines.append(f"| {p} | {c} |") if s["hvt_list"]: lines += ["","### ⚠ High-Value Targets",""] for v in s["hvt_list"]: lines.append(f"- `{_r(v)}`") lines += ["","---","## Top Threats","","| Identity | Password | Hash | IP | Phone | Domain | Source | Date | Risk |","|----------|----------|------|----|-------|--------|--------|------|------|"] for r in s["top_threats"]: hvt = " ⚑" if HVTAnalyzer.is_hvt(r) else "" lines.append( f"| {_r(_rec_get(r,'email') or _rec_get(r,'username'))}{hvt}" f" | {_r(_rec_get(r,'password'))}" f" | {_r((_rec_get(r,'password_hash') or '')[:20])}" f" | {_r(_rec_get(r,'ip_address') or '')}" f" | {_r(_rec_get(r,'phone') or '')}" f" | {_r(_rec_get(r,'domain') or '')}" f" | {_r(_rec_get(r,'source'))}" f" | {_r(_rec_get(r,'breach_date'))}" f" | {_rec_get(r,'risk_score')} |" ) lines += ["","---","## Records (top 200)","","| Email | Username | Password | Hash | IP | Phone | Domain | Source | Date | Risk |","|-------|----------|----------|------|----|-------|--------|--------|------|------|"] for r in records[:200]: lines.append( f"| {_r(_rec_get(r,'email'))}" f" | {_r(_rec_get(r,'username') or '')}" f" | {_r(_rec_get(r,'password') or '')}" f" | {_r((_rec_get(r,'password_hash') or '')[:20])}" f" | {_r(_rec_get(r,'ip_address') or '')}" f" | {_r(_rec_get(r,'phone') or '')}" f" | {_r(_rec_get(r,'domain') or '')}" f" | {_r(_rec_get(r,'source'))}" f" | {_r(_rec_get(r,'breach_date'))}" f" | {_rec_get(r,'risk_score')} |" ) # ── Dork results ────────────────────────────────────────────── dork_results = data.get("dork_results", []) or [] lines += ["","---",f"## Dork Results ({len(dork_results)} hits)",""] if dork_results: lines += ["| URL / Title | Snippet | Dork Query | Engine |","|-------------|---------|------------|--------|"] for h in dork_results: url = _r(h.get("url", h.get("title", ""))) snippet = _r(h.get("snippet", "")[:100]) dork_q = _r(h.get("dork", "")[:80]) engine = _r(h.get("engine", "")) link = f"[{url[:80]}]({url})" if url.startswith("http") else url[:80] lines.append(f"| {link} | {snippet} | {dork_q} | {engine} |") else: lines.append("_No dork hits._") # ── Scrape results ──────────────────────────────────────────── scrape_results = data.get("scrape_results", {}) or {} pastes = scrape_results.get("pastes", []) lines += ["","---",f"## Scrape — Pastes ({len(pastes)})",""] if pastes: lines += ["| Site | Paste / Link | Patterns |","|------|-------------|----------|"] paste_links = { "Pastebin": "https://pastebin.com/{}", "Rentry": "https://rentry.co/{}", "Hastebin": "https://hastebin.com/{}", "DPaste": "https://dpaste.org/{}", "Ghostbin": "https://ghostbin.com/paste/{}", "JustPaste":"https://justpaste.it/{}", "ControlC": "https://controlc.com/{}", "Paste2": "https://paste2.org/raw/{}", "PastebinPro": "https://pastebin.com/{}", } for p in pastes: site = _r(p.get("site", "")) pid = p.get("id", "") title = _r(p.get("title", pid)[:60]) tmpl = paste_links.get(p.get("site", ""), "") link = f"[{title or pid}]({tmpl.format(pid)})" if tmpl and pid else (title or _r(pid)) pats = _r(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items())) lines.append(f"| {site} | {link} | {pats} |") else: lines.append("_No pastes found._") creds_scraped = scrape_results.get("credentials", []) lines += ["","---",f"## Scrape — Extracted Credentials ({len(creds_scraped)})",""] if creds_scraped: lines += ["| Raw Credential | Source | Paste ID |","|----------------|--------|----------|"] for c in creds_scraped: lines.append(f"| `{_r(c.get('raw','')[:100])}` | {_r(c.get('source',''))} | {_r(c.get('paste_id',''))} |") else: lines.append("_No credentials extracted._") tg_hits = scrape_results.get("telegram", []) lines += ["","---",f"## Scrape — Telegram CTI ({len(tg_hits)})",""] if tg_hits: lines += ["| Channel | Message (excerpt) | Patterns |","|---------|-------------------|----------|"] for t in tg_hits: ch = _r(t.get("channel", "")) text = _r(t.get("text", "")[:150]) pats = _r(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items())) link = f"[t.me/s/{ch}](https://t.me/s/{ch})" lines.append(f"| {link} | {text} | {pats} |") else: lines.append("_No Telegram hits._") mc_hits = scrape_results.get("dork_misconfigs", []) lines += ["","---",f"## Scrape — Misconfigurations ({len(mc_hits)})",""] if mc_hits: lines += ["| URL | Title | Dork |","|-----|-------|------|"] for m in mc_hits: url_m = _r(m.get("url", "")) title_m = _r(m.get("title", "")[:60]) dork_m = _r(m.get("dork", "")[:60]) link_m = f"[{url_m[:60]}]({url_m})" if url_m.startswith("http") else url_m[:60] lines.append(f"| {link_m} | {title_m} | {dork_m} |") else: lines.append("_No misconfigurations found._") # ── Pivot Tree ──────────────────────────────────────────────── pivot_log = data.get("pivot_log", []) or [] if pivot_log: lines += ["","---",f"## Pivot Tree ({len(pivot_log)} nodes)","", "| Depth | Asset | Type | Found In | Parent | Breach | Dorks | Scrape | Children | Cracked |", "|-------|-------|------|----------|--------|--------|-------|--------|----------|---------|"] for e in sorted(pivot_log, key=lambda x: (x.get("depth", 0), x.get("parent") or "", x.get("asset", ""))): cracked_str = _r(", ".join(e.get("cracked", [])[:3])) children = e.get("children", []) children_str = _r(", ".join( f"{ch.get('asset','')}[{ch.get('found_in','?')}]" for ch in children[:4] )) if len(children) > 4: children_str += f" +{len(children)-4}" lines.append( f"| {e['depth']}" f" | `{_r(e['asset'])}`" f" | {_r(e['qtype'])}" f" | {_r(e.get('found_in', e.get('source','?')))}" f" | {_r(e.get('parent') or '')}" f" | {e['records']}" f" | {e['dorks']}" f" | {e['scrape']}" f" | {children_str}" f" | {cracked_str} |" ) # ── Discovered Assets ───────────────────────────────────────── discovered_assets = data.get("discovered_assets", []) or [] lines += ["","---",f"## Discovered Assets ({len(discovered_assets)} new identifiers reinjected)",""] if discovered_assets: lines += ["| Asset | Type | Phase | Reference (Source / URL / Paste) | Discovered From | Depth |", "|-------|------|-------|----------------------------------|-----------------|-------|"] for da in discovered_assets: ref = _r(da.get("ref", "")) link = f"[{ref[:70]}]({ref})" if ref.startswith("http") else ref[:80] lines.append( f"| `{_r(da.get('asset',''))}`" f" | {_r(da.get('qtype',''))}" f" | **{_r(da.get('phase','?')).upper()}**" f" | {link}" f" | {_r(da.get('parent',''))}" f" | {da.get('depth',0)} |" ) else: lines.append("_No pivot assets discovered._") with open(path, "w", encoding="utf-8") as fh: fh.write("\n".join(lines) + "\n") out("ok", f"Markdown saved: {path}") # ======================================================================= # REPORTER FACADE # ======================================================================= class Reporter: @staticmethod def _resolve_path(path: str, fmt: str) -> str: """If path is not absolute, place it under REPORT_DIR.""" p = Path(path) if not p.is_absolute(): p = REPORT_DIR / p return str(p) @staticmethod def to_json(data: dict, path: str) -> None: path = Reporter._resolve_path(path, "json") if _HAS_REPORTING: _rep_json(data, path); return def ser(o): if isinstance(o, (Severity, Enum)): return o.name if isinstance(o, Record): return o.to_dict() return str(o) with open(path, "w") as f: json.dump(data, f, indent=2, default=ser) out("ok", f"JSON report saved: {path}") @staticmethod def to_csv(records: list, path: str) -> None: path = Reporter._resolve_path(path, "csv") if not records: return fields = ["email","password","password_hash","username","domain","ip_address","phone","breach_name","breach_date","severity","risk_score","is_hvt","data_types","persistence_score"] with open(path, "w", newline="", encoding="utf-8") as f: w = csv.DictWriter(f, fieldnames=fields, extrasaction="ignore") w.writeheader() for r in records: row = dict(r) if isinstance(r,dict) else r.to_dict() if isinstance(row.get("severity"), Severity): row["severity"] = row["severity"].name if isinstance(row.get("data_types"), list): row["data_types"] = ", ".join(row["data_types"]) w.writerow(row) out("ok", f"CSV saved: {path}") @staticmethod def to_html(data: dict, path: str) -> None: path = Reporter._resolve_path(path, "html") if _HAS_REPORTING: _rep_html(data, path); return AdvancedReporter.to_html(data, path) @staticmethod def to_markdown(data: dict, path: str) -> None: path = Reporter._resolve_path(path, "md") AdvancedReporter.to_markdown(data, path) @staticmethod def to_pdf(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None: path = Reporter._resolve_path(path, "pdf") if _HAS_REPORTING: try: _rep_pdf(data, path, investigator_id=investigator_id) except RuntimeError as e: out("err", str(e)) return # ForensicReporter (fpdf2, full forensic layout) — primary path try: import fpdf as _fpdf_check; del _fpdf_check # noqa: F401 ForensicReporter.generate(data, path, investigator_id=investigator_id) return except ImportError: pass # Fallback: weasyprint HTML→PDF if not weasyprint: out("err", "No PDF library found. Install fpdf2: pip install fpdf2") return tmp = tempfile.NamedTemporaryFile(suffix=".html", delete=False) tmp_name = tmp.name tmp.close() try: AdvancedReporter.to_html(data, tmp_name) weasyprint(tmp_name).write_pdf(path) out("ok", f"PDF saved: {path}") finally: try: os.unlink(tmp_name) except OSError: pass # ======================================================================= # INTERACTIVE REPL # ======================================================================= class REPL: def __init__(self) -> None: self.config = NoxConfig() self.db = NoxDB() self.orc = Orchestrator(self.config, self.db) self._last = None self._last_full = None # Investigation session state self.session_state: Dict[str, Any] = { "investigator_id": os.environ.get("NOX_INVESTIGATOR_ID", "NOX-AUTO"), "targets_scanned": [], "pivot_chain": [], } self._menu_items = [ ("autoscan", "Full scan + pivot + dork + scrape + analyze"), ("scan", "Quick breach intelligence scan"), ("dork", "Google dorking for leaked data"), ("scrape", "Deep paste/web scraping + Telegram indexing"), ("crack", "Identify and crack a hash"), ("analyze", "Deep password strength analysis"), ("graph", "Forensic graph of last scan"), ("visualize", "ASCII relationship map (Target → Data → Pivots)"), ("pivot ", "Re-scan using result #n as new seed"), ("search ", "Filter in-memory records by keyword"), ("sources", "List loaded plugins with input_type, confidence, key status"), ("export", "Export last results as HTML (or: export json/csv/md/pdf)"), ("tor", "Toggle Tor routing"), ("proxy", "Set proxy"), ("config", "Configure threads/timeout"), ("clear", "Clear screen"), ("help", "Show this help"), ("quit", "Exit NOX"), ] def _show_menu(self) -> None: print(f"\n {C.G}NOX Interactive Menu:{C.W}") for i, (cmd, desc) in enumerate(self._menu_items, 1): print(f" {C.Y}{i:2}.{C.W} {cmd:<12} - {desc}") print() def run(self) -> None: self._banner() self._show_menu() while True: try: raw = input(f"\n{C.G}nox{C.W}> ").strip() if not raw: continue if raw.isdigit(): num = int(raw) if 1 <= num <= len(self._menu_items): cmd_full = self._menu_items[num-1][0] cmd = cmd_full.split()[0] # strip any suffix # Commands that need a target/argument prompt _needs_arg = {"autoscan","scan","dork","scrape","crack","analyze", "export","config","proxy","pivot","search"} if cmd in _needs_arg: if cmd == "crack": arg = input(f" {C.DM}Hash: {C.W}").strip() elif cmd == "analyze": arg = input(f" {C.DM}Password: {C.W}").strip() elif cmd in ("config", "proxy"): arg = input(f" {C.DM}Argument: {C.W}").strip() elif cmd in ("pivot", "search"): arg = input(f" {C.DM}Argument: {C.W}").strip() elif cmd == "export": arg = input(f" {C.DM}Format [html/json/csv/md/pdf]: {C.W}").strip() or "html" else: arg = input(f" {C.DM}Target: {C.W}").strip() else: arg = "" self._dispatch(cmd, arg) else: out("warn", f"Invalid number: {num}") else: parts = raw.split(None, 1) cmd = parts[0].lower() arg = parts[1] if len(parts) > 1 else "" self._dispatch(cmd, arg) except KeyboardInterrupt: print() out("info", "Interrupted. Type 'quit' to exit.") except EOFError: break except Exception as e: out("err", f"Error: {e}") def _dispatch(self, cmd: str, arg: str) -> None: if cmd in ("quit","exit","q"): out("info", "Exiting.") try: self.db.close() except Exception: pass sys.exit(0) elif cmd in ("help","h","?"): self._help() elif cmd == "autoscan": self._fullscan(arg or input(f" {C.DM}Target: {C.W}").strip()) elif cmd == "scan": self._scan(arg or input(f" {C.DM}Target: {C.W}").strip()) elif cmd == "dork": self._dork(arg or input(f" {C.DM}Target: {C.W}").strip()) elif cmd == "scrape": self._scrape(arg or input(f" {C.DM}Target: {C.W}").strip()) elif cmd == "crack": self._crack(arg or input(f" {C.DM}Hash: {C.W}").strip()) elif cmd == "analyze": self._analyze(arg or input(f" {C.DM}Password: {C.W}").strip()) elif cmd in ("sources", "list-sources"): self._sources() elif cmd == "export": self._export(arg) elif cmd == "tor": self._tor() elif cmd == "proxy": self._proxy(arg) elif cmd == "config": self._config(arg) elif cmd == "graph": self._graph() elif cmd in ("visualize", "vis"): self._visualize() elif cmd == "pivot": self._pivot(arg) elif cmd == "search": self._search(arg or input(f" {C.DM}Query: {C.W}").strip()) elif cmd == "clear": os.system("clear" if os.name != "nt" else "cls") elif cmd == "menu": self._show_menu() elif cmd == "banner": self._banner() else: out("warn", f"Unknown command: {cmd}. Type 'help' or 'menu' for options.") def _banner(self) -> None: opsec_proxy = self.config.proxy or self.config.use_tor if opsec_proxy: opsec_label = f"{C.G}[OPSEC: PROTECTED]{C.X}" elif getattr(self.config, "allow_leak", False): opsec_label = f"{C.R}[OPSEC: UNPROTECTED]{C.X}" else: opsec_label = f"{C.Y}[OPSEC: GUARDIAN]{C.X}" print(f""" {C.G} ███╗ ██╗ ██████╗ ██╗ ██╗ ████╗ ██║██╔═══██╗╚██╗██╔╝ ██╔██╗ ██║██║ ██║ ╚███╔╝ ██║╚██╗██║██║ ██║ ██╔██╗ ██║ ╚████║╚██████╔╝██╔╝ ██╗ ╚═╝ ╚═══╝ ╚═════╝ ╚═╝ ╚═╝ {C.W} Cyber Threat Intelligence Framework {C.Y}v{VERSION}{C.W} {C.DM}120+ JSON plugin sources | Async Core | Pivot Engine | JA3 TLS | HVT Detection{C.W} {opsec_label} """) def _help(self) -> None: self._show_menu() out("info", "\nYou can also type commands directly (e.g., 'scan user@example.com').") def _scan(self, arg: str) -> None: if not arg: out("warn","No target specified."); return self._last = self.orc.scan(arg) analysis = CredAnalyzer.analyze(self._last) HVTAnalyzer.annotate(self._last) hvt_records = HVTAnalyzer.filter_hvt(self._last) prev = self._last_full or {} self._last_full = { "target": arg, "records": self._last, "analysis": analysis, "hvt_records": hvt_records, "dork_results": prev.get("dork_results", []), "scrape_results": prev.get("scrape_results", {}), "pivot_chain": [arg], "pivot_log": [], "discovered_assets": [], "scan_meta": {"pivot_depth": 0, "nodes_discovered": len(self._last)}, } self.session_state["targets_scanned"].append(arg) W = 62 rs = analysis.get("risk_score", 0) sev = analysis.get("severity", {}) col = C.R if rs > 60 else C.Y if rs > 30 else C.G badge = (f"{C.R}[CRITICAL]{C.X}" if rs > 60 or sev.get("critical", 0) > 0 else f"{C.Y}[HIGH]{C.X}" if rs > 30 or sev.get("high", 0) > 0 else f"{C.G}[MEDIUM]{C.X}") print(f"\n {C.G}{'━'*W}{C.X}") print(f" {C.G} BREACH SCAN RESULTS{C.X} {badge}") print(f" {C.DM} Target: {arg}{C.X}") print(f" {C.G}{'━'*W}{C.X}") # ── Stats grid ──────────────────────────────────────────────── total = analysis.get("total_records", 0) unique = analysis.get("unique_records", total) emails = analysis.get("unique_emails", 0) pw_cnt = analysis.get("passwords_found", 0) stealer = analysis.get("stealer_logs", 0) hvt_cnt = analysis.get("hvt_count", 0) reused = len(analysis.get("reused_passwords", {})) print(f"\n {'Records':<26} {total} {C.DM}({unique} unique){C.X}") print(f" {'Unique Emails':<26} {emails}") print(f" {'Passwords Exposed':<26} {C.R}{pw_cnt}{C.X}") print(f" {'Stealer Logs':<26} {C.R}{stealer}{C.X}") print(f" {'High-Value Targets':<26} {C.O}{hvt_cnt}{C.X}") print(f" {'Password Reuse':<26} {C.Y if reused else C.DM}{reused} password(s) reused{C.X}") print(f" {'Risk Score':<26} {col}{rs}/100{C.X}") print(f" {'Severity':<26} " f"{C.R}{sev.get('critical',0)} CRIT{C.X} " f"{C.Y}{sev.get('high',0)} HIGH{C.X} " f"{sev.get('medium',0)} MED " f"{C.DM}{sev.get('low',0)} LOW{C.X}") # ── Top exposed credentials ─────────────────────────────────── creds = [(r, _rec_get(r, "password")) for r in self._last if _rec_get(r, "password")] if creds: print(f"\n {C.Y}┌─ TOP EXPOSED CREDENTIALS ({len(creds)} total) {'─'*(W-38)}┐{C.X}") for r, pw in creds[:8]: em = (_rec_get(r, "email") or _rec_get(r, "username") or "—")[:38] src = _rec_get(r, "source") or "" breach = _rec_get(r, "breach_name") or "" rs_r = _rec_get(r, "risk_score") or 0 rc = C.R if float(rs_r) >= 70 else C.Y if float(rs_r) >= 40 else C.W masked = pw[:2] + "●" * min(len(pw) - 2, 8) if len(pw) > 2 else "●●●●" ref_tag = f" {C.DM}[{breach or src}]{C.X}" if (breach or src) else "" print(f" {C.Y}│{C.X} {C.CY}{em:<38}{C.X} {rc}{masked:<12}{C.X} {rc}risk:{rs_r}{C.X}{ref_tag}") extra = self._record_assets(r) if extra: print(f" {C.Y}│{C.X} {extra}") if len(creds) > 8: print(f" {C.Y}│{C.X} {C.DM}… and {len(creds)-8} more — use 'export' for the full list{C.X}") print(f" {C.Y}└{'─'*(W-2)}┘{C.X}") # ── Non-credential assets (IPs, phones, domains, usernames, hashes) ── other = [r for r in self._last if not _rec_get(r, "password")] if other: print(f"\n {C.B}┌─ DISCOVERED ASSETS ({len(other)}) {'─'*(W-22)}┐{C.X}") for r in other[:10]: ident = _rec_get(r, "email") or _rec_get(r, "username") or _rec_get(r, "ip_address") or _rec_get(r, "domain") or "—" src = _rec_get(r, "source") or "" breach = _rec_get(r, "breach_name") or "" rs_r = _rec_get(r, "risk_score") or 0 ref = breach or src print(f" {C.B}│{C.X} {C.CY}{ident:<38}{C.X} {C.DM}risk:{rs_r} [{ref[:22]}]{C.X}") extra = self._record_assets(r) if extra: print(f" {C.B}│{C.X} {extra}") if len(other) > 10: print(f" {C.B}│{C.X} {C.DM}… and {len(other)-10} more — use 'export' for the full list{C.X}") print(f" {C.B}└{'─'*(W-2)}┘{C.X}") # ── HVT alert ───────────────────────────────────────────────── hvt = [r for r in self._last if HVTAnalyzer.is_hvt(r)] if hvt: print(f"\n {C.O}⚑ HIGH-VALUE TARGETS ({len(hvt)}){C.X}") for r in hvt[:5]: ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" rs_r = _rec_get(r, "risk_score") or "" print(f" {C.O}→{C.X} {ident:<45} {C.Y}risk: {rs_r}{C.X}") if len(hvt) > 5: print(f" {C.DM} … and {len(hvt)-5} more{C.X}") # ── Password reuse ──────────────────────────────────────────── reused_map = analysis.get("reused_passwords", {}) if reused_map: print(f"\n {C.R}⚠ PASSWORD REUSE DETECTED{C.X}") for pw, cnt in list(reused_map.items())[:4]: masked = pw[:2] + "●" * (len(pw) - 2) if len(pw) > 2 else "●●●●" print(f" {C.R}→{C.X} {masked} reused {cnt}× across breaches") print(f"\n {C.G}{'━'*W}{C.X}") print(f" {C.DM}Use 'graph' for full report | 'export pdf/html/json' for forensic output{C.X}\n") def _fullscan(self, arg: str) -> None: if not arg: out("warn","No target specified."); return out("info", f"[autoscan] Starting full scan + pivot + dork + scrape for: {arg}") # Seed the pivot chain immediately so it's visible even if the scan fails if arg not in self.session_state["pivot_chain"]: self.session_state["pivot_chain"].append(arg) result = {"target": arg, "records": [], "dork_results": [], "scrape_results": {}, "hvt_records": [], "pivot_chain": [arg], "pivot_log": [], "discovered_assets": [], "scan_meta": {}} try: try: loop = asyncio.get_running_loop() except RuntimeError: loop = None if loop and loop.is_running(): import concurrent.futures with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex: result = ex.submit(asyncio.run, self.orc.fullscan(arg, pivot=not self.config.no_pivot)).result(timeout=600) else: result = asyncio.run(self.orc.fullscan(arg, pivot=not self.config.no_pivot)) finally: self._last = result.get("records", []) self._last_full = result self.session_state["targets_scanned"].append(arg) for node in result.get("pivot_chain", [arg]): if node not in self.session_state["pivot_chain"]: self.session_state["pivot_chain"].append(node) scan_meta = result.get("scan_meta", {}) or {} elapsed = scan_meta.get("elapsed_seconds") depth = scan_meta.get("pivot_depth", 0) nodes = scan_meta.get("nodes_discovered", 0) analysis = result.get("analysis") or CredAnalyzer.analyze(self._last) rs_total = analysis.get("risk_score", 0) sev = analysis.get("severity", {}) col = C.R if rs_total > 60 else C.Y if rs_total > 30 else C.G badge = (f"{C.R}[CRITICAL]{C.X}" if rs_total > 60 or sev.get("critical", 0) > 0 else f"{C.Y}[HIGH]{C.X}" if rs_total > 30 or sev.get("high", 0) > 0 else f"{C.G}[MEDIUM]{C.X}") W = 62 print(f"\n {C.G}{'━'*W}{C.X}") print(f" {C.G} AUTOSCAN COMPLETE{C.X} {badge} {C.DM}target: {arg}{C.X}") print(f" {C.G}{'━'*W}{C.X}") # ── Summary stats ───────────────────────────────────────────── dork_count = len(result.get("dork_results", []) or []) scrape_r = result.get("scrape_results", {}) or {} paste_count = len(scrape_r.get("pastes", [])) cred_sc_cnt = len(scrape_r.get("credentials", [])) tg_count = len(scrape_r.get("telegram", [])) mc_count = len(scrape_r.get("dork_misconfigs", [])) print(f"\n {'Records':<26} {analysis.get('total_records', len(self._last or []))}" f" {C.DM}({analysis.get('unique_records', 0)} unique){C.X}") print(f" {'Passwords Exposed':<26} {C.R}{analysis.get('passwords_found', 0)}{C.X}") print(f" {'Stealer Logs':<26} {C.R}{analysis.get('stealer_logs', 0)}{C.X}") print(f" {'High-Value Targets':<26} {C.O}{analysis.get('hvt_count', 0)}{C.X}") print(f" {'Dork Hits':<26} {C.O}{dork_count}{C.X}") print(f" {'Pastes Found':<26} {C.P}{paste_count}{C.X}") if cred_sc_cnt: print(f" {'Scraped Credentials':<26} {C.R}{cred_sc_cnt}{C.X}") if tg_count: print(f" {'Telegram Hits':<26} {C.CY}{tg_count}{C.X}") if mc_count: print(f" {'Misconfigurations':<26} {C.O}{mc_count}{C.X}") print(f" {'Nodes Discovered':<26} {nodes}") print(f" {'Pivot Depth':<26} {depth}") if elapsed is not None: print(f" {'Elapsed':<26} {elapsed:.1f}s") da_cnt = len(result.get("discovered_assets", []) or []) if da_cnt: print(f" {'Reinjected Assets':<26} {C.CY}{da_cnt}{C.X}") print(f" {'Risk Score':<26} {col}{rs_total}/100{C.X}") print(f" {'Severity':<26} " f"{C.R}{sev.get('critical',0)} CRIT{C.X} " f"{C.Y}{sev.get('high',0)} HIGH{C.X} " f"{sev.get('medium',0)} MED") # ── High-Value Targets ──────────────────────────────────────── hvt = result.get("hvt_records", []) if hvt: print(f"\n {C.O}{'─'*W}{C.X}") print(f" {C.O}⚑ HIGH-VALUE TARGETS ({len(hvt)}){C.X}") print(f" {C.O}{'─'*W}{C.X}") for r in hvt[:10]: ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" rs = _rec_get(r, "risk_score") print(f" {C.R}→{C.X} {C.W}{ident:<45}{C.X} {C.Y}risk: {rs}{C.X}") extra = self._record_assets(r) if extra: print(f" {extra}") if len(hvt) > 10: print(f" {C.DM} … and {len(hvt)-10} more — use 'graph' or 'export' for the full list{C.X}") # ── Discovered Assets (flat provenance table) ───────────────── # ── Pivot Tree ──────────────────────────────────────────────── pivot_log = result.get("pivot_log", []) discovered_assets = result.get("discovered_assets", []) if pivot_log: print(f"\n {C.CY}{'─'*W}{C.X}") print(f" {C.CY} PIVOT TREE ({len(pivot_log)} nodes){C.X}") print(f" {C.CY}{'─'*W}{C.X}") self._print_pivot_tree(pivot_log, result) else: # No avalanche engine — flat display recs = self._last or [] cred_recs = [r for r in recs if _rec_get(r, "password")] other_recs = [r for r in recs if not _rec_get(r, "password")] if cred_recs: print(f"\n {C.R}{'─'*W}{C.X}") print(f" {C.R}[!] EXPOSED CREDENTIALS ({len(cred_recs)}){C.X}") print(f" {C.R}{'─'*W}{C.X}") for r in cred_recs[:12]: em = (_rec_get(r, "email") or _rec_get(r, "username") or "—")[:40] pw = _rec_get(r, "password") or "" src = _rec_get(r, "source") or "" rs_r = _rec_get(r, "risk_score") or 0 masked = pw[:2] + "●" * min(len(pw) - 2, 8) if len(pw) > 2 else "●●●●" rc = C.R if float(rs_r) >= 70 else C.Y if float(rs_r) >= 40 else C.W print(f" {C.R}→{C.X} {C.CY}{em:<40}{C.X} {rc}{masked}{C.X} {C.DM}[{src[:18]}] risk:{rs_r}{C.X}") extra = self._record_assets(r) if extra: print(f" {extra}") if len(cred_recs) > 12: print(f" {C.DM} … and {len(cred_recs)-12} more — use 'export'{C.X}") if other_recs: print(f"\n {C.B}{'─'*W}{C.X}") print(f" {C.B}[~] DISCOVERED ASSETS ({len(other_recs)}){C.X}") print(f" {C.B}{'─'*W}{C.X}") for r in other_recs[:12]: ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" src = _rec_get(r, "source") or "" rs_r = _rec_get(r, "risk_score") or 0 print(f" {C.B}→{C.X} {C.CY}{ident:<40}{C.X} {C.DM}[{src[:18]}] risk:{rs_r}{C.X}") extra = self._record_assets(r) if extra: print(f" {extra}") if len(other_recs) > 12: print(f" {C.DM} … and {len(other_recs)-12} more — use 'export'{C.X}") # ── Flat discovered assets table ────────────────────────────── if discovered_assets: _phase_col = {"breach": C.R, "dork": C.O, "scrape": C.P, "hash_crack": C.P, "seed": C.G} print(f"\n {C.B}{'─'*W}{C.X}") print(f" {C.B} DISCOVERED ASSETS ({len(discovered_assets)} new identifiers){C.X}") print(f" {C.B}{'─'*W}{C.X}") print(f" {C.DM} {'ASSET':<38} {'TYPE':<10} {'PHASE':<10} {'FOUND IN / REF'}{C.X}") print(f" {C.DM} {'─'*38} {'─'*10} {'─'*10} {'─'*30}{C.X}") for da in discovered_assets[:50]: pc = _phase_col.get(da["phase"], C.DM) ref = da.get("ref", "")[:55] print(f" {C.CY} {da['asset']:<38}{C.X} {C.DM}{da['qtype']:<10}{C.X} " f"{pc}{da['phase']:<10}{C.X} {C.DM}{ref}{C.X}") if len(discovered_assets) > 50: print(f" {C.DM} … and {len(discovered_assets)-50} more — use 'export' for full list{C.X}") print(f"\n {C.G}{'━'*W}{C.X}") print(f" {C.DM}Use 'graph' for full intelligence report | 'export pdf/html/json' for forensic output{C.X}\n") def _print_pivot_tree(self, pivot_log: list, result: dict) -> None: """Print the full pivot tree with per-node phase findings and reinjection details.""" log_by_key = {e["asset"].lower(): e for e in pivot_log} # Index breach records by the scanned asset (matched by email/username/phone/domain) all_recs = result.get("records", []) or [] recs_by_asset: Dict[str, list] = {} for r in all_recs: # A record belongs to the asset whose value matches the record's identity fields for fname in ("email", "username", "phone", "domain", "ip_address"): v = _rec_get(r, fname) if v: recs_by_asset.setdefault(v.lower(), []).append(r) break # one record → one bucket # Index dork/scrape hits by pivot_asset tag dork_by_asset: Dict[str, list] = {} for h in result.get("dork_results", []) or []: dork_by_asset.setdefault(h.get("pivot_asset", "").lower(), []).append(h) scrape_by_asset: Dict[str, list] = {} for cat in ("credentials", "pastes", "telegram", "dork_misconfigs"): for item in (result.get("scrape_results", {}) or {}).get(cat, []): if isinstance(item, dict): scrape_by_asset.setdefault( item.get("pivot_asset", "").lower(), [] ).append((cat, item)) phase_colors = { "seed": C.G, "breach": C.R, "dork": C.O, "scrape": C.P, "hash_crack": C.P, "pivot": C.CY, } def _print_node(entry: dict, prefix: str, is_last: bool) -> None: asset = entry["asset"] qtype = entry["qtype"] found_in = entry.get("found_in", entry.get("source", "?")) n_rec = entry["records"] n_dork = entry["dorks"] n_sc = entry["scrape"] cracked = entry.get("cracked") or [] children = entry.get("children", []) # list of dicts: {asset,qtype,found_in,ref} conn = "└─" if is_last else "├─" fc = phase_colors.get(found_in, C.DM) tag = f"{fc}[{found_in.upper()}]{C.X}" hvt_flag = "" # Check if this asset appears in HVT records for r in (result.get("hvt_records", []) or []): if ((_rec_get(r, "email") or _rec_get(r, "username") or "") == asset): hvt_flag = f" {C.O}⚑HVT{C.X}" break print(f" {prefix}{C.DM}{conn}{C.X} {tag} {C.W}{asset}{C.X} {C.DM}({qtype}){C.X}{hvt_flag}") cp = prefix + (" " if is_last else "│ ") # Stats stats = [] if n_rec: stats.append(f"{C.R}{n_rec} breach{C.X}") if n_dork: stats.append(f"{C.O}{n_dork} dork{C.X}") if n_sc: stats.append(f"{C.P}{n_sc} scrape{C.X}") if cracked: stats.append(f"{C.P}cracked→{', '.join(cracked[:2])}{C.X}") if stats: print(f" {cp} {C.DM}results:{C.X} {' | '.join(stats)}") # Breach records for this asset key = asset.lower() asset_recs = recs_by_asset.get(key, []) cred_recs = [r for r in asset_recs if _rec_get(r, "password")] other_recs = [r for r in asset_recs if not _rec_get(r, "password")] for r in cred_recs[:4]: em = (_rec_get(r, "email") or _rec_get(r, "username") or "—")[:32] pw = _rec_get(r, "password") or "" src = _rec_get(r, "source") or "" rs_r = float(_rec_get(r, "risk_score") or 0) masked = pw[:2] + "●" * min(len(pw)-2, 6) if len(pw) > 2 else "●●●●" rc = C.R if rs_r >= 70 else C.Y if rs_r >= 40 else C.W extra = self._record_assets(r) print(f" {cp} {C.R}breach{C.X} {C.CY}{em}{C.X} {rc}{masked}{C.X} " f"{C.DM}[{src[:20]}] risk:{rs_r:.0f}{C.X}") if extra: print(f" {cp} {extra}") if len(cred_recs) > 4: print(f" {cp} {C.DM}… +{len(cred_recs)-4} more credentials{C.X}") for r in other_recs[:2]: ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" extra = self._record_assets(r) src = _rec_get(r, "source") or "" print(f" {cp} {C.B}asset{C.X} {C.CY}{ident}{C.X} {C.DM}[{src[:20]}]{C.X}") if extra: print(f" {cp} {extra}") if len(other_recs) > 2: print(f" {cp} {C.DM}… +{len(other_recs)-2} more assets{C.X}") # Dork hits for this asset for h in dork_by_asset.get(key, [])[:3]: url = h.get("url", "")[:70] dork = h.get("dork", "")[:60] print(f" {cp} {C.O}dork{C.X} {C.DM}{url or dork}{C.X}") if url and dork: print(f" {cp} {C.DM}query: {dork[:60]}{C.X}") if len(dork_by_asset.get(key, [])) > 3: print(f" {cp} {C.DM}… +{len(dork_by_asset[key])-3} more dork hits{C.X}") # Scrape items for this asset for cat, item in scrape_by_asset.get(key, [])[:3]: if cat == "credentials": print(f" {cp} {C.R}cred{C.X} {item.get('raw','')[:65]}") elif cat == "telegram": print(f" {cp} {C.CY}tg{C.X} [{item.get('channel','')}] {item.get('text','')[:55]}") elif cat == "pastes": pats = ", ".join(f"{k}({len(v)})" for k,v in (item.get("patterns") or {}).items()) print(f" {cp} {C.P}paste{C.X} [{item.get('site','')}] {item.get('id','')[:30]} {C.DM}{pats}{C.X}") elif cat == "dork_misconfigs": print(f" {cp} {C.O}misc{C.X} {item.get('url', item.get('title',''))[:65]}") if len(scrape_by_asset.get(key, [])) > 3: print(f" {cp} {C.DM}… +{len(scrape_by_asset[key])-3} more scrape items{C.X}") # Children — show what was discovered and from which phase if children: print(f" {cp} {C.DM}↳ reinjected {len(children)} new asset(s):{C.X}") for ch in children[:8]: ch_asset = ch.get("asset", "") ch_qt = ch.get("qtype", "") ch_phase = ch.get("found_in", "?") ch_ref = ch.get("ref", "")[:55] ch_color = phase_colors.get(ch_phase, C.DM) # Show whether this child was itself processed (has a log entry) processed = "✓" if ch_asset.lower() in log_by_key else "…" print(f" {cp} {processed} {ch_color}[{ch_phase}]{C.X} " f"{C.CY}{ch_asset}{C.X} {C.DM}({ch_qt}) ref: {ch_ref}{C.X}") if len(children) > 8: print(f" {cp} {C.DM}… +{len(children)-8} more{C.X}") # Recurse into child log entries child_log_entries = [log_by_key[ch["asset"].lower()] for ch in children if ch.get("asset","").lower() in log_by_key] for i, child_entry in enumerate(child_log_entries): _print_node(child_entry, cp, is_last=(i == len(child_log_entries)-1)) roots = [e for e in pivot_log if e["depth"] == 0] for i, root in enumerate(roots): _print_node(root, "", is_last=(i == len(roots)-1)) def _dork(self, arg: str) -> None: if not arg: out("warn","No target specified."); return results = self.orc.dork(arg) prev = self._last_full or {} self._last_full = { "target": arg if not prev.get("target") else prev["target"], "records": prev.get("records", self._last or []), "analysis": prev.get("analysis", {}), "hvt_records": prev.get("hvt_records", []), "dork_results": results, "scrape_results": prev.get("scrape_results", {}), "pivot_chain": prev.get("pivot_chain", [arg]), "pivot_log": prev.get("pivot_log", []), "discovered_assets": prev.get("discovered_assets", []), "scan_meta": prev.get("scan_meta", {}), } if not self._last: self._last = self._last_full["records"] W = 62 print(f"\n {C.O}{'━'*W}{C.X}") print(f" {C.O} DORK RESULTS{C.X} {C.DM}target: {arg}{C.X}") print(f" {C.O}{'━'*W}{C.X}") if not results: print(f"\n {C.DM} No results found.{C.X}") else: # Group by engine by_engine: Dict[str, list] = {} for r in results: eng = r.get("engine", "Unknown") by_engine.setdefault(eng, []).append(r) print(f"\n {C.W}Total hits: {C.O}{len(results)}{C.X} " f"{C.DM}engines: {', '.join(f'{e}({len(v)})' for e, v in by_engine.items())}{C.X}\n") for i, r in enumerate(results[:20], 1): title = (r.get("title") or r.get("dork") or "")[:65] url = r.get("url", "") snippet = r.get("snippet", "")[:110] engine = r.get("engine", "") dork_q = r.get("dork", "")[:60] eng_tag = f" {C.DM}[{engine}]{C.X}" if engine else "" print(f" {C.O}{i:2}.{C.X} {C.W}{title}{C.X}{eng_tag}") if url: print(f" {C.CY}{url[:80]}{C.X}") if snippet: print(f" {C.DM}{snippet}{C.X}") if dork_q and dork_q != title: print(f" {C.DM}dork: {dork_q}{C.X}") print() if len(results) > 20: print(f" {C.DM} … and {len(results)-20} more — use 'export' for the full list{C.X}") print(f" {C.O}{'━'*W}{C.X}") print(f" {C.DM}Use 'export html/pdf/json' to save the full dork report.{C.X}\n") def _scrape(self, arg: str) -> None: if not arg: out("warn","No target specified."); return results = self.orc.scrape(arg) prev = self._last_full or {} self._last_full = { "target": arg if not prev.get("target") else prev["target"], "records": prev.get("records", self._last or []), "analysis": prev.get("analysis", {}), "hvt_records": prev.get("hvt_records", []), "dork_results": prev.get("dork_results", []), "scrape_results": results, "pivot_chain": prev.get("pivot_chain", [arg]), "pivot_log": prev.get("pivot_log", []), "discovered_assets": prev.get("discovered_assets", []), "scan_meta": prev.get("scan_meta", {}), } if not self._last: self._last = self._last_full["records"] pastes = results.get("pastes", []) creds = results.get("credentials", []) hashes = results.get("hashes", []) tg = results.get("telegram", []) mc = results.get("dork_misconfigs", []) total = len(pastes) + len(creds) + len(tg) + len(mc) W = 62 print(f"\n {C.P}{'━'*W}{C.X}") print(f" {C.P} SCRAPE RESULTS{C.X} {C.DM}target: {arg}{C.X}") print(f" {C.P}{'━'*W}{C.X}") # ── Summary row ─────────────────────────────────────────────── print(f"\n {'Pastes':<20} {C.P}{len(pastes)}{C.X}") print(f" {'Credentials':<20} {C.R}{len(creds)}{C.X}") print(f" {'Hashes':<20} {C.Y}{len(hashes)}{C.X}") print(f" {'Telegram Hits':<20} {C.CY}{len(tg)}{C.X}") print(f" {'Misconfigurations':<20} {C.O}{len(mc)}{C.X}") # ── Pastes ──────────────────────────────────────────────────── _paste_url_tmpl = { "Pastebin": "https://pastebin.com/{}", "Rentry": "https://rentry.co/{}", "Hastebin": "https://hastebin.com/{}", "DPaste": "https://dpaste.org/{}", "Ghostbin": "https://ghostbin.com/paste/{}", "JustPaste": "https://justpaste.it/{}", "ControlC": "https://controlc.com/{}", "Paste2": "https://paste2.org/raw/{}", } if pastes: print(f"\n {C.P}┌─ PASTES ({len(pastes)}) {'─'*(W-14)}┐{C.X}") for p in pastes[:10]: site = p.get("site", "") pid = p.get("id", "") title = (p.get("title") or pid)[:45] pats = ", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items()) tmpl = _paste_url_tmpl.get(site, "") url = tmpl.format(pid) if tmpl and pid else "" pat_tag = f" {C.DM}{pats}{C.X}" if pats else "" print(f" {C.P}│{C.X} {C.DM}[{site}]{C.X} {title}{pat_tag}") if url: print(f" {C.P}│{C.X} {C.CY} {url}{C.X}") if len(pastes) > 10: print(f" {C.P}│{C.X} {C.DM}… and {len(pastes)-10} more{C.X}") print(f" {C.P}└{'─'*(W-2)}┘{C.X}") # ── Extracted credentials ───────────────────────────────────── if creds: print(f"\n {C.R}┌─ EXTRACTED CREDENTIALS ({len(creds)}) {'─'*(W-26)}┐{C.X}") for c in creds[:12]: raw = c.get("raw", "")[:75] src = c.get("source", "") src_tag = f" {C.DM}[{src}]{C.X}" if src else "" print(f" {C.R}│{C.X} {C.R}{raw}{C.X}{src_tag}") if len(creds) > 12: print(f" {C.R}│{C.X} {C.DM}… and {len(creds)-12} more — use 'export' for the full list{C.X}") print(f" {C.R}└{'─'*(W-2)}┘{C.X}") # ── Telegram CTI ────────────────────────────────────────────── if tg: print(f"\n {C.CY}┌─ TELEGRAM CTI ({len(tg)}) {'─'*(W-18)}┐{C.X}") for t in tg[:6]: ch = t.get("channel", "") text = t.get("text", "")[:65] pats = ", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items()) pat_tag = f" {C.DM}{pats}{C.X}" if pats else "" print(f" {C.CY}│{C.X} {C.DM}[{ch}]{C.X} {text}{pat_tag}") if len(tg) > 6: print(f" {C.CY}│{C.X} {C.DM}… and {len(tg)-6} more{C.X}") print(f" {C.CY}└{'─'*(W-2)}┘{C.X}") # ── Misconfigurations ───────────────────────────────────────── if mc: print(f"\n {C.O}┌─ MISCONFIGURATIONS ({len(mc)}) {'─'*(W-22)}┐{C.X}") for m in mc[:6]: title = m.get("title", "")[:55] url = m.get("url", "")[:70] dork = m.get("dork", "")[:55] print(f" {C.O}│{C.X} {C.W}{title}{C.X}") if url: print(f" {C.O}│{C.X} {C.DM}{url}{C.X}") if dork and dork != title: print(f" {C.O}│{C.X} {C.DM}dork: {dork}{C.X}") if len(mc) > 6: print(f" {C.O}│{C.X} {C.DM}… and {len(mc)-6} more{C.X}") print(f" {C.O}└{'─'*(W-2)}┘{C.X}") if total == 0: print(f"\n {C.DM} No results found.{C.X}") print(f"\n {C.P}{'━'*W}{C.X}") print(f" {C.DM}Use 'export html/pdf/json' to save the full scrape report.{C.X}\n") def _crack(self, arg: str) -> None: if not arg: out("warn","No hash specified."); return out("info", f" Cracking: {arg}") result = self.orc.crack(arg) out("info", f" Possible types: {', '.join(t[0] for t in result.get('types',[]))}") if result.get("plaintext"): out("ok", f" ✓ CRACKED: {result['plaintext']}") out("info", f" Method: {result['method']}") else: out("warn", " Could not crack this hash with available methods.") def _analyze(self, arg: str) -> None: if not arg: out("warn","No password specified."); return r = self.orc.analyze_pass(arg) print(f"\n {C.G}Password Analysis{C.W}\n {'─'*40}") print(f" Password: {C.Y}{r['password']}{C.W}") print(f" Length: {r['length']}") print(f" Charsets: {', '.join(r['charsets'])}") print(f" Entropy: {r['entropy']} bits") print(f" Score: {r['score']}/100 ({r['strength']})") if r["patterns"]: print(f"\n {C.R}Patterns Detected:{C.W}") for p in r["patterns"]: print(f" ⚠ {p}") print(f"\n {C.G}Crack Time Estimates:{C.W}") for label, time_str in r["crack_times"].items(): print(f" {label:<30} {time_str}") def _sources(self) -> None: """ --list-sources / REPL 'sources': debug/operator view. Shows every plugin with input_type, confidence, key status, and load errors. """ # Ensure orchestrator and source orchestrator are initialised if self.orc._source_orchestrator is None: self.orc._source_orchestrator = SourceOrchestrator( asyncio.Semaphore(self.orc.config.concurrency), self.db, self.orc.config ) # Scan sources dir directly to count total JSON files (including failed ones) json_files = list(SOURCE_DIR.glob("*.json")) total_files = len(json_files) # Track load failures by attempting to parse each file failed: List[str] = [] for jf in json_files: try: json.loads(jf.read_text(encoding="utf-8")) except Exception as exc: failed.append(f"{jf.name}: {exc}") self.orc._source_orchestrator._ensure_loaded() all_sources = ( self.orc._source_orchestrator._nox_sources + self.orc._source_orchestrator._fs_providers + self.orc._source_orchestrator._py_providers ) loaded = len(all_sources) skipped = total_files - loaded # files that parsed but produced no source (e.g. key missing) W = 62 print(f"\n {C.G}{'━'*W}{C.X}") print(f" {C.G} PLUGIN DEBUG — LOADED SOURCES{C.X}") print(f" {C.G}{'━'*W}{C.X}") print(f"\n {C.W}Total JSON files in sources/:{C.X} {total_files}") print(f" {C.G}Loaded:{C.X} {loaded}") if skipped: print(f" {C.Y}Skipped (key missing/invalid):{C.X} {skipped}") if failed: print(f" {C.R}Parse errors:{C.X} {len(failed)}") print() if not all_sources: out("err", "No plugins loaded. Run: python build_sources.py") return # Column header print(f" {C.DM}{'#':>3} {'NAME':<28} {'INPUT':<10} {'CONF':>5} {'KEY STATUS'}{C.X}") print(f" {C.DM}{'─'*3} {'─'*28} {'─'*10} {'─'*5} {'─'*30}{C.X}") for i, src in enumerate(all_sources, 1): defn = getattr(src, "_def", {}) or {} name = src.name input_type = defn.get("input_type", "any") conf = defn.get("confidence", "") conf_str = f"{conf:.2f}" if isinstance(conf, float) else (str(conf) if conf else " — ") # Key status slots = defn.get("api_key_slots", []) key_name = (defn.get("required_api_key_name", "") or (slots[0].strip("{}") if slots else "")) needs_key = getattr(src, "needs_key", bool(key_name)) if not needs_key: key_col = f"{C.G}public (no key){C.X}" else: api_key = getattr(src, "_api_key", "") or "" if api_key: masked = f"****{api_key[-4:]}" if len(api_key) >= 4 else "****" key_col = f"{C.G}configured ({masked}){C.X}" else: key_col = f"{C.R}NOT configured [{key_name}]{C.X}" # Colour name by key status name_col = (C.G if (not needs_key or api_key) else C.Y) + f"{name:<28}" + C.X print(f" {C.DM}{i:>3}.{C.X} {name_col} {C.DM}{input_type:<10}{C.X} {C.CY}{conf_str:>5}{C.X} {key_col}") # Parse errors detail if failed: print(f"\n {C.R}Parse errors:{C.X}") for err in failed: print(f" {C.R}✗{C.X} {err}") print(f"\n {C.DM}Tip: set keys directly in ~/.config/nox-cli/apikeys.json (chmod 0600).{C.X}") print(f" {C.G}{'━'*W}{C.X}\n") def _export(self, arg: str) -> None: if not self._last and self._last_full: self._last = self._last_full.get("records", []) # Allow export even with no breach records if dork/scrape results exist full = self._last_full or {} has_dork = bool(full.get("dork_results")) has_scrape = bool(full.get("scrape_results")) if not self._last and not has_dork and not has_scrape: out("warn", " No results to export. Run a scan, dork, or scrape first."); return parts = arg.split() if arg else [] fmt = None remaining = [] i = 0 while i < len(parts): if parts[i] == "--format" and i + 1 < len(parts): fmt = parts[i + 1]; i += 2 elif parts[i].startswith("--format="): fmt = parts[i].split("=", 1)[1]; i += 1 else: remaining.append(parts[i]); i += 1 _known = {"json", "csv", "html", "md", "pdf"} if fmt is None and remaining and remaining[0].lower() in _known: fmt = remaining.pop(0) fmt = (fmt or "html").lower() path = remaining[0] if remaining else f"nox_report_{int(time.time())}.{fmt}" data = full if isinstance(full, dict) and ("records" in full or has_dork or has_scrape) \ else {"target": "unknown", "records": self._last} # Ensure records key always present if "records" not in data: data = dict(data); data["records"] = self._last inv = self.session_state.get("investigator_id", "NOX-AUTO") if fmt == "json": Reporter.to_json(data, path) elif fmt == "csv": resolved = Reporter._resolve_path(path, "csv") Reporter.to_csv(self._last, resolved) self._export_csv_extras(data, resolved) elif fmt == "html": Reporter.to_html(data, path) elif fmt == "md": Reporter.to_markdown(data, path) elif fmt == "pdf": Reporter.to_pdf(data, path, investigator_id=inv) else: out("warn", f" Unknown format: {fmt}. Use json/csv/html/md/pdf") @staticmethod def _export_csv_extras(data: dict, base_path: str) -> None: """Write dork and scrape results as companion CSV files alongside the main breach CSV.""" import csv as _csv base = base_path.rsplit(".", 1)[0] dork_results = data.get("dork_results", []) or [] if dork_results: dork_path = f"{base}_dorks.csv" with open(dork_path, "w", newline="", encoding="utf-8") as f: w = _csv.DictWriter(f, fieldnames=["url", "title", "snippet", "dork", "engine"], extrasaction="ignore") w.writeheader() w.writerows(dork_results) out("ok", f"Dork results CSV saved: {dork_path}") scrape = data.get("scrape_results", {}) or {} pastes = scrape.get("pastes", []) creds = scrape.get("credentials", []) tg = scrape.get("telegram", []) mc = scrape.get("dork_misconfigs", []) if pastes: p_path = f"{base}_pastes.csv" with open(p_path, "w", newline="", encoding="utf-8") as f: w = _csv.DictWriter(f, fieldnames=["site", "id", "title", "query"], extrasaction="ignore") w.writeheader() w.writerows(pastes) out("ok", f"Pastes CSV saved: {p_path}") if creds: c_path = f"{base}_scraped_creds.csv" with open(c_path, "w", newline="", encoding="utf-8") as f: w = _csv.DictWriter(f, fieldnames=["raw", "source", "paste_id"], extrasaction="ignore") w.writeheader() w.writerows(creds) out("ok", f"Scraped credentials CSV saved: {c_path}") if tg: t_path = f"{base}_telegram.csv" with open(t_path, "w", newline="", encoding="utf-8") as f: w = _csv.DictWriter(f, fieldnames=["channel", "text"], extrasaction="ignore") w.writeheader() w.writerows(tg) out("ok", f"Telegram hits CSV saved: {t_path}") if mc: m_path = f"{base}_misconfigs.csv" with open(m_path, "w", newline="", encoding="utf-8") as f: w = _csv.DictWriter(f, fieldnames=["url", "title", "dork"], extrasaction="ignore") w.writeheader() w.writerows(mc) out("ok", f"Misconfigurations CSV saved: {m_path}") discovered_assets = data.get("discovered_assets", []) or [] if discovered_assets: da_path = f"{base}_discovered_assets.csv" with open(da_path, "w", newline="", encoding="utf-8") as f: w = _csv.DictWriter(f, fieldnames=["asset", "qtype", "phase", "ref", "parent", "depth"], extrasaction="ignore") w.writeheader() w.writerows(discovered_assets) out("ok", f"Discovered assets CSV saved: {da_path}") def _config(self, arg: str) -> None: parts = arg.split(None, 1) if arg else [] if len(parts) < 2: out("info", " Config: threads, timeout, tor, proxy") out("dim", " Usage: config "); return k, v = parts try: if k == "threads": self.config.max_threads = self.config.concurrency = int(v) elif k == "timeout": self.config.timeout = int(v) elif k == "tor": self.config.use_tor = v.lower() in ("true","1","yes","on") if self.config.use_tor: self.config.proxy = f"socks5h://127.0.0.1:{self.config.tor_socks}" self._refresh_session() elif k == "proxy": self.config.proxy = v if v != "none" else None self._refresh_session() else: out("warn", f" Unknown config key: {k}"); return except ValueError: out("err", f" Invalid value for {k}: {v!r}"); return out("ok", f" {k} = {v}") def _tor(self) -> None: self.config.use_tor = not self.config.use_tor status = "ENABLED" if self.config.use_tor else "DISABLED" out("ok" if self.config.use_tor else "warn", f" Tor routing: {status}") if self.config.use_tor: self.config.proxy = f"socks5h://127.0.0.1:{self.config.tor_socks}" else: self.config.proxy = None self._refresh_session() def _proxy(self, arg: str) -> None: if not arg: out("info", f" Current proxy: {self.config.proxy or 'None'}") out("dim", " Usage: proxy | proxy none"); return self.config.proxy = None if arg.lower() == "none" else arg out("ok", f" Proxy {'disabled' if not self.config.proxy else f'set: {arg}'}") self._refresh_session() def _refresh_session(self) -> None: self.orc.session = Session(self.config) self.orc.dork_engine.s = self.orc.session self.orc.scrape_engine.s = self.orc.session self.orc.hash_engine._session = self.orc.session self.orc.dorking_engine = DorkingEngine(self.config.concurrency, self.orc.db, self.config) # ── Investigation Dashboard ──────────────────────────────────────────── @staticmethod def _risk_badge(analysis: dict) -> str: rs = analysis.get("risk_score", 0) if analysis else 0 sev = analysis.get("severity", {}) if analysis else {} if rs > 60 or sev.get("critical", 0) > 0: return f"{C.R}[CRITICAL]{C.W}" if rs > 30 or sev.get("high", 0) > 0: return f"{C.Y}[HIGH]{C.W}" return f"{C.G}[MEDIUM]{C.W}" def _graph(self) -> None: """Mini forensic report — printed after autoscan or on demand.""" if not self._last and self._last_full: self._last = self._last_full.get("records", []) full = self._last_full or {} if not full.get("target"): out("warn", "No results loaded. Run a scan, dork, or scrape first."); return if self._last is None: self._last = [] full = self._last_full or {} target = full.get("target", "unknown") analysis = full.get("analysis") or {} badge = self._risk_badge(analysis) W = 62 print(f"\n {C.G}{'━'*W}{C.X}") print(f" {C.G} NOX INTELLIGENCE REPORT{C.X} {badge}") print(f" {C.G}{'━'*W}{C.X}") ts = full.get("timestamp") or "" print(f" Target : {C.BD}{target}{C.X}") if ts: print(f" Timestamp: {C.DM}{ts}{C.X}") rs = analysis.get("risk_score", 0) sev = analysis.get("severity", {}) col = C.R if rs > 60 else C.Y if rs > 30 else C.G print(f"\n {C.Y}[ EXECUTIVE SUMMARY ]{C.X}") scan_meta = full.get("scan_meta", {}) or {} pivot_depth = scan_meta.get("pivot_depth", 0) nodes = scan_meta.get("nodes_discovered", 0) elapsed = scan_meta.get("elapsed_seconds") dork_count = len(full.get("dork_results", []) or []) scrape_r = full.get("scrape_results", {}) or {} paste_cnt = len(scrape_r.get("pastes", [])) cred_sc_cnt = len(scrape_r.get("credentials", [])) tg_cnt = len(scrape_r.get("telegram", [])) mc_cnt = len(scrape_r.get("dork_misconfigs", [])) print(f" Records : {analysis.get('total_records', len(self._last or []))}" f" {C.DM}({analysis.get('unique_records',0)} unique){C.X}") print(f" Unique Emails : {analysis.get('unique_emails', 0)}") print(f" Passwords Found : {C.R}{analysis.get('passwords_found', 0)}{C.X}") print(f" Stealer Logs : {C.R}{analysis.get('stealer_logs', 0)}{C.X}") print(f" HVT Accounts : {C.O}{analysis.get('hvt_count', 0)}{C.X}") if dork_count: print(f" Dork Hits : {C.O}{dork_count}{C.X}") if paste_cnt: print(f" Pastes : {C.P}{paste_cnt}{C.X}") if cred_sc_cnt: print(f" Scraped Creds : {C.R}{cred_sc_cnt}{C.X}") if tg_cnt: print(f" Telegram Hits : {C.CY}{tg_cnt}{C.X}") if mc_cnt: print(f" Misconfigs : {C.O}{mc_cnt}{C.X}") if nodes: print(f" Nodes Discovered : {nodes}") if pivot_depth: print(f" Pivot Depth : {pivot_depth}") if elapsed is not None: print(f" Scan Duration : {elapsed:.1f}s") da_cnt = len(full.get("discovered_assets", []) or []) if da_cnt: print(f" Reinjected Assets: {C.CY}{da_cnt}{C.X}") print(f" Risk Score : {col}{rs}/100{C.X}") print(f" Severity : {C.R}{sev.get('critical',0)} CRIT{C.X} " f"{C.Y}{sev.get('high',0)} HIGH{C.X} {sev.get('medium',0)} MED") # Pivot chain — prefer the one from the fullscan result (avalanche order) pivot_log = full.get("pivot_log", []) chain = full.get("pivot_chain") or self.session_state.get("pivot_chain", []) if pivot_log: print(f"\n {C.Y}[ PIVOT TREE ({len(pivot_log)} nodes) ]{C.X}") self._print_pivot_tree(pivot_log, full) # Show discovered assets after pivot tree discovered_assets = full.get("discovered_assets", []) or [] if discovered_assets: _phase_col = {"breach": C.R, "dork": C.O, "scrape": C.P, "hash_crack": C.P, "seed": C.G} print(f"\n {C.Y}[ DISCOVERED ASSETS ({len(discovered_assets)} new identifiers) ]{C.X}") print(f" {C.DM} {'ASSET':<38} {'TYPE':<10} {'PHASE':<10} REFERENCE{C.X}") for da in discovered_assets[:30]: pc = _phase_col.get(da["phase"], C.DM) ref = da.get("ref", "")[:55] print(f" {C.CY} {da['asset']:<38}{C.X} {C.DM}{da['qtype']:<10}{C.X} " f"{pc}{da['phase']:<10}{C.X} {C.DM}{ref}{C.X}") if len(discovered_assets) > 30: print(f" {C.DM} … and {len(discovered_assets)-30} more — use 'export'{C.X}") else: # No pivot log — flat display if len(chain) > 1: print(f"\n {C.Y}[ PIVOT CHAIN ({len(chain)} nodes) ]{C.X}") for i, node in enumerate(chain[:20]): pfx = " " if i == 0 else " ↳ " print(f" {C.DM}{pfx}{C.X}{C.CY}{node}{C.X}") if len(chain) > 20: print(f" {C.DM} … and {len(chain)-20} more nodes{C.X}") hvt = full.get("hvt_records", []) if hvt: print(f"\n {C.Y}[ HIGH-VALUE TARGETS ]{C.X}") for r in hvt[:8]: ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" rs_r = _rec_get(r, "risk_score") or "" rs_tag = f" {C.Y}risk:{rs_r}{C.X}" if rs_r else "" print(f" {C.R}⚑{C.X} {ident}{rs_tag}") creds = [(r, _rec_get(r, "password")) for r in self._last if _rec_get(r, "password")] other_assets = [r for r in self._last if not _rec_get(r, "password") and (_rec_get(r, "email") or _rec_get(r, "username") or _rec_get(r, "ip_address") or _rec_get(r, "phone"))] if creds: print(f"\n {C.Y}[ EXPOSED CREDENTIALS ]{C.X}") for r, pw in creds[:10]: em = _rec_get(r, "email") or _rec_get(r, "username") or "—" src = _rec_get(r, "source") or "" masked = pw[:2] + "●" * min(len(pw) - 2, 8) if len(pw) > 2 else "●●●●" print(f" {C.R}→{C.X} {C.CY}{em}{C.X} {C.R}{masked}{C.X} {C.DM}[{src}]{C.X}") extra = REPL._record_assets(r) if extra: print(f" {extra}") if len(creds) > 10: print(f" {C.DM} … and {len(creds)-10} more — use 'export'{C.X}") if other_assets: print(f"\n {C.Y}[ DISCOVERED ASSETS ({len(other_assets)}) ]{C.X}") for r in other_assets[:15]: ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" src = _rec_get(r, "source") or "" print(f" {C.Y}→{C.X} {C.CY}{ident}{C.X} {C.DM}← {src}{C.X}") extra = REPL._record_assets(r) if extra: print(f" {extra}") if len(other_assets) > 15: print(f" {C.DM} … and {len(other_assets)-15} more — use 'export'{C.X}") reused = analysis.get("reused_passwords", {}) if reused: print(f"\n {C.Y}[ PASSWORD REUSE ]{C.X}") for pw, cnt in list(reused.items())[:5]: masked = pw[:2] + "●" * (len(pw) - 2) if len(pw) > 2 else "●●●●" print(f" {C.R}⚠{C.X} {masked} → reused {cnt}× across breaches") dorks = full.get("dork_results", []) if dorks: print(f"\n {C.Y}[ DORK FINDINGS ({len(dorks)}) ]{C.X}") for d in dorks[:5]: url = d.get("url", "") or d.get("title", "") dork_q = d.get("dork", "")[:50] print(f" {C.Y}→{C.X} {C.DM}{url[:70]}{C.X}") if dork_q: print(f" {C.DM}dork: {dork_q}{C.X}") if len(dorks) > 5: print(f" {C.DM} … and {len(dorks)-5} more — use 'export'{C.X}") scrape = full.get("scrape_results", {}) or {} scraped_creds = scrape.get("credentials", []) tg = scrape.get("telegram", []) misconfigs = scrape.get("dork_misconfigs", []) pastes = scrape.get("pastes", []) if scraped_creds or tg or misconfigs or pastes: print(f"\n {C.Y}[ SCRAPE FINDINGS ]{C.X}") if pastes: print(f" Pastes : {C.P}{len(pastes)}{C.X}") for p in pastes[:3]: print(f" {C.P}→{C.X} [{p.get('site','')}] {p.get('id','')[:30]}") if scraped_creds: print(f" Credentials : {C.R}{len(scraped_creds)}{C.X}") for c in scraped_creds[:5]: print(f" {C.R}→{C.X} {c.get('raw','')[:70]}") if tg: print(f" Telegram : {C.CY}{len(tg)}{C.X}") for t in tg[:3]: print(f" {C.CY}→{C.X} [{t.get('channel','')}] {t.get('text','')[:60]}") if misconfigs: print(f" Misconfigs : {C.O}{len(misconfigs)}{C.X}") for m in misconfigs[:3]: print(f" {C.O}→{C.X} {m.get('title','')[:60]}") # ── Discovered Assets (flat provenance) ─────────────────────── discovered_assets = full.get("discovered_assets", []) or [] if discovered_assets: _phase_col = {"breach": C.R, "dork": C.O, "scrape": C.P, "hash_crack": C.P, "seed": C.G} print(f"\n {C.Y}[ DISCOVERED ASSETS ({len(discovered_assets)} new identifiers) ]{C.X}") print(f" {C.DM} {'ASSET':<38} {'TYPE':<10} {'PHASE':<10} REFERENCE{C.X}") for da in discovered_assets[:30]: pc = _phase_col.get(da["phase"], C.DM) ref = da.get("ref", "")[:55] print(f" {C.CY} {da['asset']:<38}{C.X} {C.DM}{da['qtype']:<10}{C.X} " f"{pc}{da['phase']:<10}{C.X} {C.DM}{ref}{C.X}") if len(discovered_assets) > 30: print(f" {C.DM} … and {len(discovered_assets)-30} more — use 'export'{C.X}") print(f"\n {C.G}{'━'*W}{C.X}") print(f" {C.DM}Use 'export pdf/html/json' for the full forensic report.{C.X}\n") def _pivot(self, arg: str) -> None: if not self._last: out("warn", "No results loaded. Run a scan first."); return if not arg or not arg.strip().isdigit(): out("warn", "Usage: pivot (see [pivot N] hints in graph output)"); return idx = int(arg.strip()) - 1 if not (0 <= idx < len(self._last)): out("warn", f"Index out of range. Valid: 1–{len(self._last)}"); return r = self._last[idx] seed = (_rec_get(r, "email") or _rec_get(r, "username") or _rec_get(r, "phone") or _rec_get(r, "domain") or "") if not seed: out("warn", "Selected record has no pivotable identifier."); return out("pivot", f"Pivoting → async fullscan on: {C.CY}{seed}{C.X}") self._fullscan(seed) def _visualize(self) -> None: """ ASCII Relationship Map: Target → Linked Data → Pivot Points. Shows the full investigation session chain and cross-links. """ full_data = self._last_full or {} if not self._last and self._last_full: self._last = self._last_full.get("records", []) if not full_data.get("target"): out("warn", "No results loaded. Run a scan, dork, or scrape first."); return if self._last is None: self._last = [] target = (self._last_full or {}).get("target", "unknown") chain = self.session_state.get("pivot_chain", []) scanned = self.session_state.get("targets_scanned", []) # Collect linked data emails, phones, usernames, addresses, passwords = ( set(), set(), set(), set(), set() ) source_map: Dict[str, str] = {} # value → source name for r in self._last: for attr, bucket in [("email", emails), ("phone", phones), ("username", usernames), ("password", passwords)]: v = _rec_get(r, attr) if v: bucket.add(v) source_map[v] = _rec_get(r, "source") or "" addr = getattr(r, "address", "") or "" if addr: addresses.add(addr) W = 70 print(f"\n {C.G}{'━'*W}{C.X}") print(f" {C.G} INVESTIGATION RELATIONSHIP MAP{C.X} " f"[{self.session_state.get('investigator_id','NOX-AUTO')}]") print(f" {C.G}{'━'*W}{C.X}\n") # Session pivot chain if len(chain) > 1: print(f" {C.Y}Pivot Chain:{C.X}") for i, t in enumerate(chain): arrow = " " if i == 0 else " ↳ " print(f" {C.DM}{arrow}{C.X}{C.CY}{t}{C.X}") print() # Central target node print(f" {C.G}◉{C.X} {C.BD}{target}{C.X}") # Linked data branches groups = [ ("Emails", sorted(emails)[:8], C.CY), ("Phones", sorted(phones)[:6], C.CY), ("Usernames", sorted(usernames)[:6], C.G), ("Addresses", sorted(addresses)[:4], C.Y), ("Passwords", sorted(passwords)[:5], C.R), ] active_groups = [(lbl, vals, col) for lbl, vals, col in groups if vals] for gi, (label, values, color) in enumerate(active_groups): is_last_group = (gi == len(active_groups) - 1) grp_pfx = " └─" if is_last_group else " ├─" cont_pfx = " " if is_last_group else " │ " print(f" {C.DM}{grp_pfx}{C.X} {C.P}[{label}]{C.X}") for vi, v in enumerate(values): is_last_val = (vi == len(values) - 1) val_pfx = f"{cont_pfx} └─" if is_last_val else f"{cont_pfx} ├─" src_tag = f" {C.DM}← {source_map.get(v,'')[:20]}{C.X}" if source_map.get(v) else "" # Mark as pivot point if it appears in scanned targets pivot_tag = f" {C.Y}[PIVOT]{C.X}" if v in scanned else "" print(f" {C.DM}{val_pfx}{C.X} {color}{v}{C.X}{src_tag}{pivot_tag}") # ── Dork results branch ─────────────────────────────────────── full_data = self._last_full or {} dork_results = full_data.get("dork_results", []) or [] if dork_results: print(f"\n {C.Y}◈ Dork Findings ({len(dork_results)}){C.X}") for d in dork_results[:8]: title = d.get("title","") or d.get("dork","") url = d.get("url","") print(f" {C.DM} ├─{C.X} {C.O}{title[:60]}{C.X}") if url: print(f" {C.DM} │ {url[:70]}{C.X}") if len(dork_results) > 8: print(f" {C.DM} └─ … and {len(dork_results)-8} more{C.X}") # ── Scrape results branch ───────────────────────────────────── scrape_results = full_data.get("scrape_results", {}) or {} pastes = scrape_results.get("pastes", []) creds_sc = scrape_results.get("credentials", []) tg_hits = scrape_results.get("telegram", []) mc_hits = scrape_results.get("dork_misconfigs", []) if pastes or creds_sc or tg_hits or mc_hits: total_scrape = len(pastes) + len(creds_sc) + len(tg_hits) + len(mc_hits) print(f"\n {C.P}◈ Scrape Findings ({total_scrape}){C.X}") if pastes: print(f" {C.DM} ├─{C.X} {C.P}[Pastes: {len(pastes)}]{C.X}") for p in pastes[:3]: print(f" {C.DM} │ ├─{C.X} [{p.get('site','')}] {p.get('id','')[:40]}") if len(pastes) > 3: print(f" {C.DM} │ └─ … and {len(pastes)-3} more{C.X}") if creds_sc: print(f" {C.DM} ├─{C.X} {C.R}[Credentials: {len(creds_sc)}]{C.X}") for c in creds_sc[:3]: print(f" {C.DM} │ ├─{C.X} {C.R}{c.get('raw','')[:60]}{C.X}") if len(creds_sc) > 3: print(f" {C.DM} │ └─ … and {len(creds_sc)-3} more{C.X}") if tg_hits: print(f" {C.DM} ├─{C.X} {C.CY}[Telegram: {len(tg_hits)}]{C.X}") for t in tg_hits[:3]: print(f" {C.DM} │ ├─{C.X} {C.CY}[{t.get('channel','')}]{C.X} {t.get('text','')[:50]}") if len(tg_hits) > 3: print(f" {C.DM} │ └─ … and {len(tg_hits)-3} more{C.X}") if mc_hits: print(f" {C.DM} └─{C.X} {C.O}[Misconfigs: {len(mc_hits)}]{C.X}") for m in mc_hits[:3]: print(f" {C.DM} ├─{C.X} {C.O}{m.get('title','')[:60]}{C.X}") if len(mc_hits) > 3: print(f" {C.DM} └─ … and {len(mc_hits)-3} more{C.X}") # ── Discovered / reinjected assets branch ──────────────────── discovered_assets = full_data.get("discovered_assets", []) or [] if discovered_assets: _phase_col = {"breach": C.R, "dork": C.O, "scrape": C.P, "hash_crack": C.P} print(f"\n {C.B}◈ Reinjected Assets ({len(discovered_assets)}){C.X}") for da in discovered_assets[:12]: pc = _phase_col.get(da["phase"], C.DM) ref = da.get("ref", "")[:50] print(f" {C.DM} ├─{C.X} {pc}[{da['phase']}]{C.X} " f"{C.CY}{da['asset']}{C.X} {C.DM}({da['qtype']}) ← {ref}{C.X}") if len(discovered_assets) > 12: print(f" {C.DM} └─ … and {len(discovered_assets)-12} more — use 'export'{C.X}") print(f"\n {C.G}{'━'*W}{C.X}") print(f" {C.DM}Targets scanned: {len(scanned)} | " f"Records: {len(self._last or [])} | " f"Tip: 'export --format pdf' for forensic report{C.X}\n") def _search(self, query: str) -> None: if not query: out("warn", "Usage: search "); return if not self._last: out("warn", "No results in memory. Run a scan first."); return q = query.lower() hits = [r for r in self._last if q in str(_rec_get(r, "email") or "").lower() or q in str(_rec_get(r, "username") or "").lower() or q in str(_rec_get(r, "password") or "").lower() or q in str(_rec_get(r, "domain") or "").lower() or q in str(_rec_get(r, "source") or "").lower()] if not hits: out("warn", f"No records match '{query}'."); return out("ok", f" {len(hits)} match(es) for '{query}':\n") for i, r in enumerate(hits[:30], 1): em = _rec_get(r, "email") or _rec_get(r, "username") or "—" pw = _rec_get(r, "password") ph = _rec_get(r, "phone") src = _rec_get(r, "source") or "" line = f" {C.DM}{i:3}.{C.W} {C.CY}{em}{C.W}" if pw: line += f" {C.R}pw:{pw}{C.W}" if ph: line += f" {C.CY}☎ {ph}{C.W}" if src: line += f" {C.DM}[{src}]{C.W}" print(line) print() @staticmethod def _record_assets(r: Any) -> str: """Return a compact string of every non-empty asset field in a record.""" parts = [] for label, key in [("ip", "ip_address"), ("phone", "phone"), ("domain", "domain"), ("name", "full_name"), ("addr", "address")]: v = _rec_get(r, key) if v: parts.append(f"{C.DM}{label}:{C.X}{v}") ph = _rec_get(r, "password_hash") ht = _rec_get(r, "hash_type") if ph and not _rec_get(r, "password"): parts.append(f"{C.DM}hash[{ht or '?'}]:{C.X}{ph[:20]}…") dt = _rec_get(r, "data_types") or [] if isinstance(dt, list) and dt: parts.append(f"{C.DM}[{', '.join(dt[:3])}]{C.X}") return " ".join(parts) def _print_summary(self, a: dict) -> None: if not a: return badge = self._risk_badge(a) print(f"\n {C.G}{'═'*55}{C.W}") print(f" {C.G}CTI RESULTS SUMMARY{C.W} {badge}") print(f" {C.G}{'═'*55}{C.W}") print(f" Total Records: {a.get('total_records',0)}") print(f" Unique (deduped): {a.get('unique_records',a.get('total_records',0))}") print(f" Unique Emails: {a.get('unique_emails',0)}") print(f" Passwords Found: {C.R}{a.get('passwords_found',0)}{C.W}") print(f" Stealer Logs: {C.R}{a.get('stealer_logs',0)}{C.W}") print(f" High-Value Targets: {C.O}{a.get('hvt_count',0)}{C.W}") print(f" Password Reuse: {len(a.get('reused_passwords',{}))}") print(f" Avg Persistence Score: {a.get('avg_persistence',0.0)}") # Show dork/scrape counts if available (autoscan) full = self._last_full or {} dork_count = len(full.get("dork_results", []) or []) scrape = full.get("scrape_results", {}) or {} paste_count = len(scrape.get("pastes", [])) cred_count = len(scrape.get("credentials", [])) tg_count = len(scrape.get("telegram", [])) mc_count = len(scrape.get("dork_misconfigs", [])) if dork_count: print(f" Dork Hits: {C.O}{dork_count}{C.W}") if paste_count or cred_count or tg_count or mc_count: print(f" Scraped Pastes: {C.P}{paste_count}{C.W}") if cred_count: print(f" Scraped Credentials: {C.R}{cred_count}{C.W}") if tg_count: print(f" Telegram Hits: {C.CY}{tg_count}{C.W}") if mc_count: print(f" Misconfigurations: {C.O}{mc_count}{C.W}") rs = a.get("risk_score",0) col = C.R if rs > 60 else C.Y if rs > 30 else C.G print(f" Risk Score: {col}{rs}/100{C.W}") sev = a.get("severity",{}) print(f"\n Severity: {C.R}■ {sev.get('critical',0)} CRITICAL{C.W} {C.Y}■ {sev.get('high',0)} HIGH{C.W} ■ {sev.get('medium',0)} MEDIUM") profiles = a.get("profiles",[]) if profiles: max_stuffing = max((p.get("stuffing_risk","LOW") for p in profiles), key=lambda x: {"LOW":0,"MEDIUM":1,"HIGH":2,"CRITICAL":3}.get(x,0), default="LOW") col = C.R if max_stuffing=="CRITICAL" else C.Y if max_stuffing in ("HIGH","MEDIUM") else C.G print(f" Credential Stuffing: {col}{max_stuffing}{C.W}") reused = a.get("reused_passwords",{}) if reused: print(f"\n {C.R}Password Reuse Detected:{C.W}") for pw, cnt in list(reused.items())[:5]: masked = pw[:2]+"*"*(len(pw)-2) if len(pw)>4 else "****" print(f" {masked} → used {cnt}x across breaches") # ======================================================================= # 1. API & SECRETS MANAGEMENT # ======================================================================= import configparser as _configparser class ConfigManager: """ Unified API key manager — delegates to sources/helpers/config_handler.py (XDG JSON store at ~/.config/nox-cli/apikeys.json) when available, with a legacy config.ini fallback. Resolution order: env-var → apikeys.json → config.ini → '' """ _cache: Dict[str, str] = {} _INI_PATHS = [HOME_NOX / "config.ini", Path("/etc/nox/config.ini")] _store_mtime: float = 0.0 @classmethod def _invalidate_if_changed(cls) -> None: """Clear the key cache if apikeys.json was modified externally.""" if not _HAS_CONFIG_HANDLER or _ExtConfigManager is None: return try: from sources.helpers.config_handler import _APIKEYS_FILE # type: ignore if _APIKEYS_FILE and _APIKEYS_FILE.exists(): mtime = _APIKEYS_FILE.stat().st_mtime if mtime != cls._store_mtime: cls._cache.clear() cls._store_mtime = mtime if _ExtConfigManager._store is not None: _ExtConfigManager._store = None _ExtConfigManager._cache.clear() except Exception: pass @classmethod def get(cls, key_name: str) -> str: cls._invalidate_if_changed() if key_name in cls._cache: return cls._cache[key_name] # 1. Delegate to external handler (XDG JSON store) if _HAS_CONFIG_HANDLER and _ExtConfigManager is not None: val = _ExtConfigManager.get(key_name) if val: cls._cache[key_name] = val return val # 2. Environment variable val = os.environ.get(key_name) or os.environ.get(f"NOX_{key_name}", "") # 3. Legacy config.ini if not val: for p in cls._INI_PATHS: if p.exists(): cfg = _configparser.ConfigParser() cfg.read(str(p)) val = cfg.get("api_keys", key_name, fallback="") if val: break if val == UNIVERSAL_PLACEHOLDER: val = "" cls._cache[key_name] = val return val @classmethod def write(cls, key_name: str, value: str) -> None: """Persist a key — prefers the XDG JSON store, falls back to config.ini.""" if _HAS_CONFIG_HANDLER and _ExtConfigManager is not None: _ExtConfigManager.set(key_name, value) cls._cache[key_name] = value return # Legacy: write to config.ini _write_path = HOME_NOX / "config.ini" _write_path.parent.mkdir(parents=True, exist_ok=True) cfg = _configparser.ConfigParser() if _write_path.exists(): cfg.read(str(_write_path)) if "api_keys" not in cfg: cfg["api_keys"] = {} cfg["api_keys"][key_name] = value with open(_write_path, "w") as fh: cfg.write(fh) cls._cache[key_name] = value # ======================================================================= # 2. EXTREME MODULARITY — JSON Source Engine # ======================================================================= class JSONSourceLoader(AsyncSource): """ Dynamically loads a custom breach source defined by a JSON file in ~/.nox/sources/. Each file must contain: { "name": "MySource", "url": "https://api.example.com/search?q={query}", "method": "GET", // or "POST" "headers": {"X-Key": "{api_key}"}, "payload": {}, // POST body template (optional) "api_key_env": "MY_API_KEY", // env-var / config.ini key (optional) "extract": { "mode": "json", // "json" or "regex" "root": "results", // JSON path to list (dot-separated) "email": "email", "password": "password", "username": "username", "phone": "phone", "hash": "hash" } } For regex mode, each field value is a regex pattern with one capture group. """ _SOURCES_DIR = SOURCE_DIR def __init__(self, semaphore: asyncio.Semaphore, db: "DB", config: "NoxConfig", definition: dict) -> None: super().__init__(semaphore, db, config) self._def = definition self.name = definition.get("name", "JSONSource") env_key = definition.get("api_key_env", "") self._api_key = ConfigManager.get(env_key) if env_key else "" self.needs_key = bool(env_key) self.ok_email = self.ok_user = self.ok_domain = self.ok_phone = True async def async_search(self, session, query: str, qtype: str) -> List[Record]: if self.needs_key and not self._api_key: logger.debug("JSONSourceLoader[%s]: API key missing, skipping.", self.name) return [] try: return await self._fetch(session, query) except Exception as exc: logger.debug("JSONSourceLoader[%s]: %s", self.name, exc) return [] async def _fetch(self, session, query: str) -> List[Record]: d = self._def url = d["url"].replace("{query}", urllib.parse.quote(query, safe="")).replace("{api_key}", self._api_key) headers = {k: v.replace("{api_key}", self._api_key) for k, v in d.get("headers", {}).items()} method = d.get("method", "GET").upper() payload = {k: v.replace("{query}", query).replace("{api_key}", self._api_key) for k, v in d.get("payload", {}).items()} if method == "POST": status, text, _ = await self._post(session, url, json_data=payload or None, data=payload if not payload else None, headers=headers) else: status, text, _ = await self._get(session, url, headers=headers) if status not in range(200, 300) or not text: return [] ext = d.get("extract", {}) mode = ext.get("mode", "json") if mode == "regex": return self._extract_regex(text, ext, query) return self._extract_json(text, ext, query) def _extract_json(self, text: str, ext: dict, query: str) -> List[Record]: try: data = json.loads(text) except Exception: return [] # Navigate to root list root_path = ext.get("root", "") for key in (root_path.split(".") if root_path else []): if isinstance(data, dict): data = data.get(key, []) if not isinstance(data, list): data = [data] if isinstance(data, dict) else [] records = [] for item in data[:100]: if not isinstance(item, dict): continue records.append(self._rec( email = str(item.get(ext.get("email", "email"), "") or ""), password = str(item.get(ext.get("password", "password"), "") or ""), username = str(item.get(ext.get("username", "username"), "") or ""), phone = str(item.get(ext.get("phone", "phone"), "") or ""), password_hash = str(item.get(ext.get("hash", "hash"), "") or ""), breach_name = self.name, data_types = [self.name, "Credentials"], raw_data = item, )) return records def _extract_regex(self, text: str, ext: dict, query: str) -> List[Record]: field_patterns = {f: ext[f] for f in ("email","password","username","phone","hash") if f in ext} # Find all matches per field field_values: Dict[str, List[str]] = {} for fname, pattern in field_patterns.items(): field_values[fname] = re.findall(pattern, text) # Zip into records (align by index) max_len = max((len(v) for v in field_values.values()), default=0) records = [] for i in range(min(max_len, 100)): records.append(self._rec( email = field_values.get("email", [""])[i] if i < len(field_values.get("email", [])) else "", password = field_values.get("password", [""])[i] if i < len(field_values.get("password", [])) else "", username = field_values.get("username", [""])[i] if i < len(field_values.get("username", [])) else "", phone = field_values.get("phone", [""])[i] if i < len(field_values.get("phone", [])) else "", password_hash = field_values.get("hash", [""])[i] if i < len(field_values.get("hash", [])) else "", breach_name = self.name, data_types = [self.name, "Credentials"], )) return records @classmethod def load_all(cls, semaphore: asyncio.Semaphore, db: "DB", config: "NoxConfig") -> List["JSONSourceLoader"]: """Scan ~/.nox/sources/ and return one loader per valid .json file.""" cls._SOURCES_DIR.mkdir(parents=True, exist_ok=True) loaders = [] for jf in cls._SOURCES_DIR.glob("*.json"): try: definition = json.loads(jf.read_text(encoding="utf-8")) loaders.append(cls(semaphore, db, config, definition)) logger.info("JSONSourceLoader: loaded %s", jf.name) except Exception as exc: logger.warning("JSONSourceLoader: failed to load %s — %s", jf.name, exc) return loaders # ======================================================================= # 3. DeHashEngine & ReputationEngine # ======================================================================= class DeHashEngine: """ Queries MD5/SHA1 hashes found during scans against de-hashing APIs. Requires DEHASHED_API_KEY (email:api_key format) or DEHASH_API_KEY. Gracefully skips if key is absent. """ def __init__(self, db: "DB", config: "NoxConfig") -> None: self._db = db self._config = config self._key = (ConfigManager.get("DEHASHED_API_KEY") or ConfigManager.get("DEHASH_API_KEY") or db.get_key("dehashed")) async def dehash_records(self, session, records: List[Record]) -> List[Record]: """Attempt to crack any unhashed passwords found in records.""" if not self._key: return records hashes = {r.password_hash for r in records if r.password_hash and not r.password} if not hashes: return records sem = asyncio.Semaphore(5) tasks = [self._lookup(session, sem, h) for h in list(hashes)[:20]] results = await asyncio.gather(*tasks, return_exceptions=True) crack_map: Dict[str, str] = {} for res in results: if isinstance(res, tuple): crack_map[res[0]] = res[1] for r in records: if r.password_hash in crack_map: r.password = crack_map[r.password_hash] r.data_types = list(set(r.data_types + ["DeHashed"])) return records async def _lookup(self, session, sem: asyncio.Semaphore, h: str): cached = self._db.get_plain(h) if cached: return (h, cached) try: auth = base64.b64encode(self._key.encode()).decode() if ":" in self._key else self._key url = f"https://api.dehashed.com/v2/search?query=hashed_password:{h}&size=1" hdrs = {"Accept": "application/json", "Authorization": f"Basic {auth}"} async with sem: to = aiohttp_mod.ClientTimeout(total=self._config.timeout) if aiohttp_mod else None async with session.get(url, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: if resp.status == 200: data = await resp.json() for entry in data.get("entries", []): pw = entry.get("password", "") if pw: self._db.store_hash(h, "unknown", pw, "DeHashed") return (h, pw) except Exception as exc: logger.debug("DeHashEngine._lookup %s: %s", h[:16], exc) return (h, "") class ReputationEngine: """ Checks IP/Domain targets via VirusTotal. Requires VIRUSTOTAL_API_KEY. Gracefully skips if absent. """ _VT_URL = "https://www.virustotal.com/api/v3" def __init__(self, config: "NoxConfig") -> None: self._config = config self._key = (ConfigManager.get("VIRUSTOTAL_API_KEY") or ConfigManager.get("VT_API_KEY")) async def check(self, session, target: str, qtype: str) -> Optional[dict]: """Return VirusTotal summary dict or None if key missing / not applicable.""" if not self._key or qtype not in ("ip", "domain", "url"): return None try: if qtype == "ip": url = f"{self._VT_URL}/ip_addresses/{target}" elif qtype == "domain": url = f"{self._VT_URL}/domains/{target}" else: encoded = base64.urlsafe_b64encode(target.encode()).decode().rstrip("=") url = f"{self._VT_URL}/urls/{encoded}" hdrs = {"x-apikey": self._key} to = aiohttp_mod.ClientTimeout(total=self._config.timeout) if aiohttp_mod else None async with session.get(url, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: if resp.status == 200: data = await resp.json() stats = (data.get("data", {}) .get("attributes", {}) .get("last_analysis_stats", {})) return { "target": target, "malicious": stats.get("malicious", 0), "suspicious": stats.get("suspicious", 0), "harmless": stats.get("harmless", 0), "source": "VirusTotal", } except Exception as exc: logger.debug("ReputationEngine.check %s: %s", target, exc) return None # ======================================================================= # 4. PROFESSIONAL PDF REPORTING (fpdf2) # ======================================================================= def _pdf_report(data: dict, path: str) -> None: """ Generate a professional PDF report using fpdf2. Layout: Title Page → Executive Summary → Entities Table → Raw Evidence. Falls back gracefully if fpdf2 is not installed. """ try: from fpdf import FPDF # type: ignore except ImportError: out("warn", "fpdf2 not installed. Run: pip install fpdf2") return records = data.get("records", []) target = data.get("target", "Unknown") ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") summary = AdvancedReporter._build_summary(records) class _PDF(FPDF): def header(self): self.set_font("Helvetica", "B", 9) self.set_text_color(100, 100, 100) self.cell(0, 6, f"NOX Framework v{VERSION} | CONFIDENTIAL", align="R") self.ln(4) def footer(self): self.set_y(-12) self.set_font("Helvetica", "", 8) self.set_text_color(150, 150, 150) self.cell(0, 6, f"Page {self.page_no()}", align="C") pdf = _PDF(orientation="P", unit="mm", format="A4") pdf.set_auto_page_break(auto=True, margin=15) pdf.set_margins(15, 15, 15) # ── Title Page ──────────────────────────────────────────────────── pdf.add_page() pdf.set_fill_color(10, 10, 10) pdf.rect(0, 0, 210, 297, "F") pdf.set_y(80) pdf.set_font("Helvetica", "B", 32) pdf.set_text_color(0, 255, 65) pdf.cell(0, 14, "NOX FRAMEWORK REPORT", align="C") pdf.ln(10) pdf.set_font("Helvetica", "", 14) pdf.set_text_color(200, 200, 200) pdf.cell(0, 8, f"Target: {target}", align="C") pdf.ln(7) pdf.set_font("Helvetica", "", 11) pdf.set_text_color(150, 150, 150) pdf.cell(0, 7, f"Generated: {ts}", align="C") pdf.ln(5) pdf.cell(0, 7, "FOR AUTHORISED USE ONLY", align="C") # ── Executive Summary ───────────────────────────────────────────── pdf.add_page() pdf.set_fill_color(255, 255, 255) pdf.set_text_color(0, 0, 0) pdf.set_font("Helvetica", "B", 16) pdf.cell(0, 10, "Executive Summary", ln=True) pdf.set_draw_color(0, 200, 50) pdf.set_line_width(0.5) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(4) max_risk = max((float(_rec_get(r, "risk_score") or 0) for r in records), default=0.0) kpis = [ ("Compromised Identities", summary["total_identities"]), ("Total Records", summary["total_records"]), ("Stealer Logs", summary["stealer_count"]), ("High-Value Targets", summary["hvt_count"]), ("Max Risk Score", f"{max_risk:.1f} / 100"), ] pdf.set_font("Helvetica", "B", 10) for label, value in kpis: pdf.set_fill_color(245, 245, 245) pdf.cell(90, 8, label, border=1, fill=True) pdf.set_font("Helvetica", "", 10) pdf.cell(85, 8, str(value), border=1, ln=True) pdf.set_font("Helvetica", "B", 10) pdf.ln(6) # Risk distribution pdf.set_font("Helvetica", "B", 12) pdf.cell(0, 8, "Risk Distribution", ln=True) pdf.set_font("Helvetica", "B", 9) for col, w in [("Level", 40), ("Count", 30), ("Bar", 105)]: pdf.set_fill_color(30, 30, 30) pdf.set_text_color(255, 255, 255) pdf.cell(w, 7, col, border=1, fill=True) pdf.ln() pdf.set_text_color(0, 0, 0) total_b = max(sum(summary["buckets"].values()), 1) colours = {"Critical": (220,0,30), "High": (220,100,0), "Medium": (200,180,0), "Low": (0,150,50), "Info": (100,100,100)} for level, count in summary["buckets"].items(): pdf.set_font("Helvetica", "", 9) pdf.cell(40, 6, level, border=1) pdf.cell(30, 6, str(count), border=1) bar_w = int(count / total_b * 100) x, y = pdf.get_x(), pdf.get_y() pdf.cell(105, 6, "", border=1) if bar_w: r2, g2, b2 = colours.get(level, (100,100,100)) pdf.set_fill_color(r2, g2, b2) pdf.rect(x + 1, y + 1, bar_w, 4, "F") pdf.ln() pdf.ln(4) # HVT list if summary["hvt_list"]: pdf.set_font("Helvetica", "B", 12) pdf.cell(0, 8, f"High-Value Targets ({summary['hvt_count']})", ln=True) pdf.set_font("Helvetica", "", 9) for hvt in summary["hvt_list"][:20]: pdf.cell(0, 5, f" \u26a0 {hvt}", ln=True) pdf.ln(3) # ── Discovered Entities Table ───────────────────────────────────── pdf.add_page() pdf.set_font("Helvetica", "B", 16) pdf.cell(0, 10, "Discovered Entities", ln=True) pdf.set_draw_color(0, 200, 50) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(4) col_widths = [55, 40, 35, 25, 25] headers = ["Identity", "Source", "Breach", "Date", "Risk"] pdf.set_font("Helvetica", "B", 8) pdf.set_fill_color(30, 30, 30) pdf.set_text_color(255, 255, 255) for h, w in zip(headers, col_widths): pdf.cell(w, 7, h, border=1, fill=True) pdf.ln() pdf.set_text_color(0, 0, 0) for rec in records[:200]: ident = (_rec_get(rec, "email") or _rec_get(rec, "username") or "—")[:30] src = (_rec_get(rec, "source") or "")[:20] bn = (_rec_get(rec, "breach_name") or "")[:20] bd = (_rec_get(rec, "breach_date") or "")[:10] rs_v = f"{float(_rec_get(rec, 'risk_score') or 0):.1f}" risk = float(_rec_get(rec, "risk_score") or 0) if risk >= 90: pdf.set_fill_color(255, 220, 220) elif risk >= 70: pdf.set_fill_color(255, 240, 220) else: pdf.set_fill_color(255, 255, 255) pdf.set_font("Helvetica", "", 7) for val, w in zip([ident, src, bn, bd, rs_v], col_widths): pdf.cell(w, 5, val, border=1, fill=True) pdf.ln() # ── Raw Evidence ────────────────────────────────────────────────── pdf.add_page() pdf.set_font("Helvetica", "B", 16) pdf.set_text_color(0, 0, 0) pdf.cell(0, 10, "Raw Evidence — Passwords & Metadata", ln=True) pdf.set_draw_color(0, 200, 50) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(4) pdf.set_font("Courier", "", 7) for rec in records[:300]: pw = _rec_get(rec, "password") meta = getattr(rec, "metadata", {}) or {} if not pw and not meta: continue ident = (_rec_get(rec, "email") or _rec_get(rec, "username") or "—")[:40] line = f"{ident}" if pw: line += f" pw:{pw[:40]}" if meta.get("author"): line += f" author:{meta['author'][:20]}" pdf.cell(0, 4, line[:120], ln=True) pdf.output(path) out("ok", f"PDF report saved: {path}") # ======================================================================= # PLUGIN SYSTEM — Vault, FileSystemProvider, SourceOrchestrator # ======================================================================= import importlib as _importlib class Vault: """ Thin compatibility shim — delegates entirely to ConfigManager (apikeys.json). Canonical key store: ~/.config/nox-cli/apikeys.json (chmod 0600). """ _cache: Dict[str, str] = {} @classmethod def get(cls, key: str) -> str: if key in cls._cache: return cls._cache[key] val = ConfigManager.get(key) or "" cls._cache[key] = val return val @classmethod def set(cls, key: str, value: str, prefer_nox_dir: bool = True) -> None: ConfigManager.write(key, value) cls._cache[key] = value @classmethod def autodehash(cls, records: List["Record"], db: "DB") -> List["Record"]: """ AutoDehash hook: for any record with a hash but no plaintext, attempt a lookup via DEHASH_API_KEY if available. Uses the existing DB hash cache to avoid redundant API calls. """ key = cls.get("DEHASH_API_KEY") or cls.get("DEHASHED_API_KEY") if not key: return records for r in records: if r.password_hash and not r.password: cached = db.get_plain(r.password_hash) if cached: r.password = cached continue # Synchronous fallback lookup via requests/urllib try: auth = base64.b64encode(key.encode()).decode() if ":" in key else key url = (f"https://api.dehashed.com/v2/search" f"?query=hashed_password:{r.password_hash}&size=1") hdrs = {"Accept": "application/json", "Authorization": f"Basic {auth}", "User-Agent": "NOX Framework"} if requests: resp = requests.get(url, headers=hdrs, timeout=10, verify=True) data = resp.json() if resp.status_code == 200 else {} else: req = urllib.request.Request(url, headers=hdrs) raw = urllib.request.urlopen(req, timeout=10) data = json.loads(raw.read().decode()) for entry in data.get("entries", []): pw = entry.get("password", "") if pw: r.password = pw db.store_hash(r.password_hash, r.hash_type or "unknown", pw, "Vault/AutoDehash") break except Exception as exc: logger.debug("Vault.autodehash %s: %s", r.password_hash[:12], exc) return records class Config: """ General settings loader from config.ini. Lookup order: $HOME/.nox/config.ini → /etc/nox/config.ini. config.ini format: [settings] concurrency = 20 timeout = 30 stealth = true rate_limit_lo = 0.5 rate_limit_hi = 2.0 """ _INI_PATHS = [HOME_NOX / "config.ini", Path("/etc/nox/config.ini")] _cache: Dict[str, Any] = {} @classmethod def _ini_path(cls) -> Optional[Path]: for p in cls._INI_PATHS: if p.exists(): return p return None @classmethod def get(cls, key: str, default: Any = None) -> Any: if key in cls._cache: return cls._cache[key] ini = cls._ini_path() if ini: cp = _configparser.ConfigParser() cp.read(str(ini)) val = cp.get("settings", key, fallback=None) if val is not None: # Auto-cast booleans and numbers if val.lower() in ("true", "false"): val = val.lower() == "true" else: try: val = int(val) except ValueError: try: val = float(val) except ValueError: pass cls._cache[key] = val return val cls._cache[key] = default return default @classmethod def apply(cls, nox_config: "NoxConfig") -> "NoxConfig": """Overlay config.ini values onto a NoxConfig instance.""" if not cls._ini_path(): return nox_config nox_config.concurrency = nox_config.max_threads = cls.get("concurrency", nox_config.concurrency) nox_config.timeout = cls.get("timeout", nox_config.timeout) nox_config.stealth = cls.get("stealth", nox_config.stealth) lo = cls.get("rate_limit_lo", nox_config.rate_limit[0]) hi = cls.get("rate_limit_hi", nox_config.rate_limit[1]) nox_config.rate_limit = (lo, hi) return nox_config class FileSystemProvider(AsyncSource): """ Loads a single breach source from a JSON definition file in ~/.config/nox/providers/. JSON schema: { "name": "MySource", "api_url": "https://api.example.com/search?q={query}", "request_type": "GET", "headers": {"Authorization": "Bearer {api_key}"}, "payload": {}, "regex_pattern": "(\\S+@\\S+):(\\S+)", // optional; groups: email, password "json_root": "results", // dot-path to list in JSON response "field_map": {"email":"email","password":"password"}, "required_api_key_name": "MY_SOURCE_API_KEY" // Vault key name } """ PROVIDERS_DIR = HOME_NOX / "providers" def __init__(self, semaphore: asyncio.Semaphore, db: "DB", config: "NoxConfig", definition: dict) -> None: super().__init__(semaphore, db, config) self._def = definition self.name = definition.get("name", "FSProvider") key_name = definition.get("required_api_key_name", "") self._api_key = Vault.get(key_name) if key_name else "" self.needs_key = bool(key_name) self.ok_email = self.ok_user = self.ok_domain = self.ok_phone = True async def async_search(self, session, query: str, qtype: str) -> List[Record]: if self.needs_key and not self._api_key: logger.debug("FileSystemProvider[%s]: key missing, skipping.", self.name) return [] try: return await self._fetch(session, query) except Exception as exc: logger.debug("FileSystemProvider[%s]: %s", self.name, exc) return [] async def _fetch(self, session, query: str) -> List[Record]: d = self._def url = (d["api_url"] .replace("{query}", urllib.parse.quote(query, safe="")) .replace("{api_key}", self._api_key)) hdrs = {k: v.replace("{api_key}", self._api_key) for k, v in d.get("headers", {}).items()} method = d.get("request_type", "GET").upper() payload = {k: v.replace("{query}", query).replace("{api_key}", self._api_key) for k, v in d.get("payload", {}).items()} if method == "POST": status, text, _ = await self._post(session, url, json_data=payload or None, headers=hdrs) else: status, text, _ = await self._get(session, url, headers=hdrs) if status not in range(200, 300) or not text: return [] regex = d.get("regex_pattern", "") if regex: return self._by_regex(text, regex) return self._by_json(text, d.get("json_root", ""), d.get("field_map", {})) def _by_regex(self, text: str, pattern: str) -> List[Record]: records = [] for m in re.finditer(pattern, text): groups = m.groups() records.append(self._rec( email = groups[0] if len(groups) > 0 else "", password = groups[1] if len(groups) > 1 else "", breach_name = self.name, data_types = [self.name, "Credentials"], )) return records[:100] def _by_json(self, text: str, root: str, field_map: dict) -> List[Record]: try: data = json.loads(text) except Exception: return [] for key in (root.split(".") if root else []): if isinstance(data, dict): data = data.get(key, []) if not isinstance(data, list): data = [data] if isinstance(data, dict) else [] records = [] for item in data[:100]: if not isinstance(item, dict): continue records.append(self._rec( email = str(item.get(field_map.get("email", "email"), "") or ""), password = str(item.get(field_map.get("password", "password"), "") or ""), username = str(item.get(field_map.get("username", "username"), "") or ""), phone = str(item.get(field_map.get("phone", "phone"), "") or ""), password_hash = str(item.get(field_map.get("hash", "hash"), "") or ""), breach_name = self.name, data_types = [self.name, "Credentials"], raw_data = item, )) return records @classmethod def load_all(cls, semaphore: asyncio.Semaphore, db: "DB", config: "NoxConfig") -> List["FileSystemProvider"]: cls.PROVIDERS_DIR.mkdir(parents=True, exist_ok=True) providers = [] for jf in cls.PROVIDERS_DIR.glob("*.json"): try: defn = json.loads(jf.read_text(encoding="utf-8")) providers.append(cls(semaphore, db, config, defn)) logger.info("FileSystemProvider: loaded %s", jf.name) except Exception as exc: logger.warning("FileSystemProvider: failed %s — %s", jf.name, exc) return providers class NoxSourceProvider(FileSystemProvider): """ Extended FileSystemProvider that handles the build_sources.py JSON schema: - Headers already have keys resolved (passed via _slot_keys) - Supports input_type filtering (skip source if query type doesn't match) - Handles api_key_slots rotation """ def __init__(self, semaphore: asyncio.Semaphore, db: "DB", config: "NoxConfig", definition: dict) -> None: super().__init__(semaphore, db, config, definition) self._input_type = definition.get("input_type", "") self._slot_keys = definition.get("_slot_keys", {}) self._confidence = definition.get("confidence", 0.5) # For sources with api_key_slots, check if any key is configured slots = definition.get("api_key_slots", []) if slots and not self._api_key: # Try each slot for slot in slots: key_name = slot.strip("{}") val = ConfigManager.get(key_name) if val: self._api_key = val break self.needs_key = bool(slots) async def async_search(self, session, query: str, qtype: str) -> List[Record]: # Filter by input_type if specified ('any' or '' means accept all qtypes) if self._input_type and self._input_type != "any" and qtype and self._input_type != qtype: return [] if self.needs_key and not self._api_key: logger.debug("NoxSourceProvider[%s]: key missing, skipping.", self.name) return [] try: return await self._fetch(session, query) except Exception as exc: logger.debug("NoxSourceProvider[%s]: %s", self.name, exc) return [] async def _fetch(self, session, query: str) -> List[Record]: d = self._def # Apply optional query transform before URL substitution. # Currently supported: "md5_lower" — MD5-hex of the lowercased, stripped query. transform = d.get("query_transform", "") if transform == "md5_lower": import hashlib as _hl try: effective_query = _hl.md5(query.lower().strip().encode(), usedforsecurity=False).hexdigest() except TypeError: effective_query = _hl.md5(query.lower().strip().encode()).hexdigest() elif transform == "fofa_domain": import base64 as _b64 effective_query = _b64.b64encode( f'domain="{query.lower().strip()}"'.encode() ).decode() else: effective_query = query # Headers are already resolved in _load_nox_sources; just substitute {query} hdrs = {k: v.replace("{query}", urllib.parse.quote(effective_query, safe="")) for k, v in d.get("headers", {}).items()} url = (d["api_url"] .replace("{query}", urllib.parse.quote(effective_query, safe="")) .replace("{api_key}", self._api_key or "")) # Also substitute any remaining {KEY_NAME} placeholders in URL for slot_name, slot_val in self._slot_keys.items(): url = url.replace(f"{{{slot_name}}}", slot_val or "") method = d.get("request_type", "GET").upper() def _sub(obj): """Recursively substitute {query} in payload (handles nested dicts/lists).""" if isinstance(obj, str): return obj.replace("{query}", effective_query).replace("{target}", effective_query) if isinstance(obj, dict): return {k: _sub(v) for k, v in obj.items()} if isinstance(obj, list): return [_sub(v) for v in obj] return obj payload = _sub(d.get("payload") or {}) if method == "POST": status, text, _ = await self._post(session, url, json_data=payload or None, headers=hdrs) else: status, text, _ = await self._get(session, url, headers=hdrs) # If the primary endpoint fails, try backup_endpoints in order. if status not in range(200, 300) or not text: for backup in (d.get("backup_endpoints") or []): backup_url = (backup .replace("{query}", urllib.parse.quote(query, safe="")) .replace("{target}", urllib.parse.quote(query, safe=""))) for slot_name, slot_val in self._slot_keys.items(): backup_url = backup_url.replace(f"{{{slot_name}}}", slot_val or "") if method == "POST": status, text, _ = await self._post(session, backup_url, json_data=payload or None, headers=hdrs) else: status, text, _ = await self._get(session, backup_url, headers=hdrs) if status in range(200, 300) and text: break # as a job submission, extract the job ID via poll_id_field, then poll # poll_endpoint?= until results arrive. poll_endpoint = d.get("poll_endpoint", "") if poll_endpoint: try: job_id = json.loads(text).get(d.get("poll_id_field", "id")) except Exception: job_id = None if not job_id: return [] poll_param = d.get("poll_id_param", "id") poll_root = d.get("poll_json_root", d.get("json_root", "")) poll_url = f"{poll_endpoint}?{poll_param}={job_id}" delay = 2 for _ in range(4): await asyncio.sleep(delay) p_status, p_text, _ = await self._get(session, poll_url, headers=hdrs) if p_status not in range(200, 300) or not p_text: delay = min(delay * 2, 16) continue try: items = json.loads(p_text) for key in (poll_root.split(".") if poll_root else []): if isinstance(items, dict): items = items.get(key, []) if isinstance(items, list) and items: return self._by_json(p_text, poll_root, d.get("field_map", {})) except Exception: pass delay = min(delay * 2, 16) return [] regex = d.get("regex_pattern", "") if regex: return self._by_regex(text, regex) return self._by_json(text, d.get("json_root", ""), d.get("field_map", {})) class SourceOrchestrator: """ Plugin-based source manager — 100% dynamic, zero hardcoded sources. Loads all intelligence sources exclusively from: 1. ~/.nox/sources/*.json — primary plugin directory (build_sources.py output) 2. ~/.nox/providers/*.json — extended FileSystemProvider plugins 3. ~/.nox/providers/plugin_*.py — dynamic importlib plugins FATAL if sources/ is empty: prints a clear error and aborts the scan. """ # Spec-required path: ~/.nox/sources/ SOURCES_DIR = SOURCE_DIR def __init__(self, semaphore: asyncio.Semaphore, db: "DB", config: "NoxConfig") -> None: self._sem = semaphore self._db = db self._config = config self._nox_sources: List[AsyncSource] = [] # from ~/.nox/sources/ self._fs_providers: List[AsyncSource] = [] # from ~/.nox/providers/ self._py_providers: List[AsyncSource] = [] # importlib .py plugins self._loaded = False def _ensure_loaded(self) -> None: if self._loaded: return self._nox_sources = self._load_nox_sources() self._fs_providers = FileSystemProvider.load_all(self._sem, self._db, self._config) self._py_providers = self._load_py_plugins() self._loaded = True total = len(self._nox_sources) + len(self._fs_providers) + len(self._py_providers) if total == 0: print( f"\n {C.BD}{C.R}[FATAL] No JSON plugins found in sources/. " f"Please run build_sources.py first.{C.X}\n" ) logger.critical("[FATAL] No JSON plugins found in sources/. Run build_sources.py.") def _load_nox_sources(self) -> List[AsyncSource]: """ Scan ~/.nox/sources/*.json. Handles both the build_sources.py schema (endpoint/{target}, normalization_map, selectors, api_key_slots) and the legacy FileSystemProvider schema (api_url/{query}, field_map, json_root). """ self.SOURCES_DIR.mkdir(parents=True, exist_ok=True) json_files = list(self.SOURCES_DIR.glob("*.json")) if not json_files: return [] sources: List[AsyncSource] = [] for jf in json_files: try: raw = json.loads(jf.read_text(encoding="utf-8")) slots = raw.get("api_key_slots", []) # Derive primary key name from slots (strip {}) derived_key_name = ( raw.get("required_api_key_name", "") or (slots[0].strip("{}") if slots else "") ) # Resolve all key names from slots for header substitution slot_keys = {s.strip("{}"): ConfigManager.get(s.strip("{}")) for s in slots} # Build headers: replace {KEY_NAME} placeholders with actual key values raw_headers = raw.get("headers", {}) resolved_headers = {} for k, v in raw_headers.items(): for slot_name, slot_val in slot_keys.items(): v = v.replace(f"{{{slot_name}}}", slot_val or "") resolved_headers[k] = v # Normalise endpoint: {target} → {query} for FileSystemProvider compat endpoint = raw.get("endpoint", raw.get("api_url", "")) endpoint = endpoint.replace("{target}", "{query}") # Build field_map from normalization_map (inverted: output_field → source_field) norm_map = raw.get("normalization_map", {}) field_map = raw.get("field_map", {}) if norm_map and not field_map: # normalization_map: {"email": "email_address"} means source field "email_address" → our "email" field_map = {our_field: src_field for our_field, src_field in norm_map.items() if our_field in ("email", "password", "username", "phone", "hash")} # json_root from selectors (e.g. "$.entries" → "entries") selectors = raw.get("selectors", {}) json_root = raw.get("json_root", "") if not json_root and selectors: # Take first selector value, strip "$." prefix first_sel = next(iter(selectors.values()), "") if first_sel.startswith("$."): # Handle "$.entries" → "entries", "$.*.Name" → "" (complex path, skip) parts = first_sel[2:].split(".") json_root = parts[0] if len(parts) == 1 else "" defn = { "name": raw.get("name", jf.stem), "api_url": endpoint, "request_type": raw.get("method", raw.get("request_type", "GET")), "headers": resolved_headers, "regex_pattern": raw.get("regex_pattern", ""), "json_root": json_root, "field_map": field_map, "required_api_key_name": derived_key_name, "api_key_slots": slots, "input_type": raw.get("input_type", ""), "output_type": raw.get("output_type", []), "pivot_types": raw.get("pivot_types", []), "confidence": raw.get("confidence", 0.5), # payload_template → payload for POST sources "payload": raw.get("payload_template") or raw.get("payload") or {}, # Pass resolved slot keys so FileSystemProvider can use them "_slot_keys": slot_keys, # Two-phase poll support "poll_endpoint": raw.get("poll_endpoint", ""), "poll_id_field": raw.get("poll_id_field", "id"), "poll_id_param": raw.get("poll_id_param", "id"), "poll_json_root": raw.get("poll_json_root", ""), "backup_endpoints": raw.get("backup_endpoints", []), "query_transform": raw.get("query_transform", ""), } inst = NoxSourceProvider(self._sem, self._db, self._config, defn) inst._bypass_required = raw.get("bypass_required") or [] sources.append(inst) logger.debug("SourceOrchestrator: loaded %s", jf.name) except Exception as exc: logger.warning("SourceOrchestrator: failed %s — %s", jf.name, exc) logger.info("SourceOrchestrator: loaded %d sources from sources/", len(sources)) return sources def _load_py_plugins(self) -> List[AsyncSource]: """Dynamically import plugin_*.py files via importlib.""" plugins: List[AsyncSource] = [] for py_file in FileSystemProvider.PROVIDERS_DIR.glob("plugin_*.py"): try: spec = _importlib.util.spec_from_file_location(py_file.stem, py_file) module = _importlib.util.module_from_spec(spec) spec.loader.exec_module(module) if hasattr(module, "create"): inst = module.create(self._sem, self._db, self._config) if isinstance(inst, list): plugins.extend(inst) elif inst is not None: plugins.append(inst) logger.info("SourceOrchestrator: loaded plugin %s", py_file.name) except Exception as exc: logger.warning("SourceOrchestrator: plugin %s failed — %s", py_file.name, exc) return plugins def get_sources(self, session: "Session", qtype: str) -> List[AsyncSource]: """Return plugin sources applicable to qtype, pre-filtered to avoid creating unnecessary tasks.""" self._ensure_loaded() # curl_cffi presence cached in OPTIONAL after first _try_import call _has_cffi = "curl_cffi" in OPTIONAL or _try_import("curl_cffi") is not None sources: List[AsyncSource] = [] for src in self._nox_sources: bypass = getattr(src, "_bypass_required", []) or [] if "cloudflare" in bypass and not _has_cffi: logger.debug("Skipping %s — cloudflare bypass required, curl_cffi absent", src.name) continue input_type = getattr(src, "_input_type", "") if not input_type or input_type == "any" or not qtype or input_type == qtype: sources.append(src) sources.extend(self._fs_providers) sources.extend(self._py_providers) return sources def plugin_count(self) -> int: self._ensure_loaded() return len(self._nox_sources) + len(self._fs_providers) + len(self._py_providers) # ======================================================================= # FORENSIC REPORTER (fpdf2) # ======================================================================= def _pdf_safe(s: str, maxlen: int = 200) -> str: """ Sanitise a string for fpdf2 core fonts (latin-1 subset). 1. Strip control characters and binary garbage. 2. Replace non-latin-1 characters with '?' to prevent UnicodeEncodeError. 3. Truncate to maxlen to prevent cell overflow. """ if not s: return "" # Strip control chars (same regex as AdvancedReporter._CTRL_RE) s = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]", "", s) return s[:maxlen].encode("latin-1", errors="replace").decode("latin-1") class ForensicReporter: """ Professional forensic PDF report using fpdf2. Sections: 1. Case Metadata — Timestamp, Investigator ID, Target 2. Executive Summary — Risk Score (0–10 scale), severity breakdown 3. Categorized Findings — Credentials, PII, Dorked Documents 4. Dork Results — URL, snippet, dork query, engine 5. Scrape Results — Pastes (with links), extracted credentials, Telegram CTI, misconfigs 6. Identity Graph — ASCII relationship map """ @staticmethod def generate(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None: try: from fpdf import FPDF # type: ignore except ImportError: out("warn", "fpdf2 not installed. Run: pip install fpdf2") return records = data.get("records", []) target = data.get("target", "Unknown") ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") summary = AdvancedReporter._build_summary(records) # Risk score normalised to 0–10 max_risk = max((float(_rec_get(r, "risk_score") or 0) for r in records), default=0.0) risk_10 = round(max_risk / 10, 1) # Categorise findings credentials = [r for r in records if _rec_get(r, "password") or _rec_get(r, "password_hash")] pii = [r for r in records if _rec_get(r, "phone") or _rec_get(r, "name") or getattr(r, "address", "")] dorked = [r for r in records if _rec_get(r, "source") == "DorkingEngine"] class _PDF(FPDF): def header(self): self.set_font("Helvetica", "B", 8) self.set_text_color(120, 120, 120) self.cell(0, 5, "NOX Framework - FORENSIC REPORT - CONFIDENTIAL", align="R") self.ln(3) def footer(self): self.set_y(-12) self.set_font("Helvetica", "", 8) self.set_text_color(150, 150, 150) self.cell(0, 5, _pdf_safe(f"Page {self.page_no()} | Case: {target[:40]}"), align="C") pdf = _PDF(orientation="P", unit="mm", format="A4") pdf.set_auto_page_break(auto=True, margin=15) pdf.set_margins(15, 15, 15) # ── 1. Case Metadata ───────────────────────────────────────── pdf.add_page() pdf.set_fill_color(15, 15, 15) pdf.rect(0, 0, 210, 297, "F") pdf.set_y(70) pdf.set_font("Helvetica", "B", 28) pdf.set_text_color(0, 220, 60) pdf.cell(0, 12, "FORENSIC INTELLIGENCE REPORT", align="C") pdf.ln(8) pdf.set_font("Helvetica", "B", 14) pdf.set_text_color(200, 200, 200) pdf.cell(0, 8, _pdf_safe(f"Target: {target}"), align="C") pdf.ln(6) pdf.set_font("Helvetica", "", 11) pdf.set_text_color(140, 140, 140) for line in [f"Timestamp: {ts}", f"Investigator ID: {investigator_id}", f"Framework: NOX Framework v{VERSION}", "Classification: RESTRICTED - Authorised Use Only"]: pdf.cell(0, 6, _pdf_safe(line), align="C") pdf.ln(5) # ── 2. Executive Summary ───────────────────────────────────── pdf.add_page() pdf.set_fill_color(255, 255, 255) pdf.set_text_color(0, 0, 0) pdf.set_font("Helvetica", "B", 16) pdf.cell(0, 10, "Executive Summary", ln=True) pdf.set_draw_color(0, 180, 50) pdf.set_line_width(0.4) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(4) # Risk score gauge (0–10) risk_colour = (200, 0, 30) if risk_10 >= 8 else (220, 110, 0) if risk_10 >= 5 else (0, 160, 50) pdf.set_font("Helvetica", "B", 11) kpis = [ ("Risk Score (0-10)", f"{risk_10} {'#' * int(risk_10)}{'-' * (10 - int(risk_10))}"), ("Compromised Identities", str(summary["total_identities"])), ("Total Records", str(summary["total_records"])), ("Stealer Logs", str(summary["stealer_count"])), ("High-Value Targets", str(summary["hvt_count"])), ("Credential Records", str(len(credentials))), ("PII Records", str(len(pii))), ("Dorked Documents", str(len(dorked))), ] for label, value in kpis: pdf.set_fill_color(245, 245, 245) pdf.cell(90, 7, _pdf_safe(label), border=1, fill=True) if label.startswith("Risk"): pdf.set_text_color(*risk_colour) pdf.set_font("Helvetica", "", 10) pdf.cell(85, 7, _pdf_safe(value), border=1, ln=True) pdf.set_text_color(0, 0, 0) pdf.set_font("Helvetica", "B", 11) pdf.ln(5) # Severity breakdown pdf.set_font("Helvetica", "B", 12) pdf.cell(0, 8, "Severity Breakdown", ln=True) _sev_colours = {"Critical":(220,0,30),"High":(220,100,0), "Medium":(200,180,0),"Low":(0,150,50),"Info":(100,100,100)} total_b = max(sum(summary["buckets"].values()), 1) for level, count in summary["buckets"].items(): pdf.set_font("Helvetica", "", 9) pdf.cell(35, 6, _pdf_safe(level), border=1) pdf.cell(20, 6, str(count), border=1) bar_w = int(count / total_b * 120) x, y = pdf.get_x(), pdf.get_y() pdf.cell(125, 6, "", border=1) if bar_w: rc, gc, bc = _sev_colours.get(level, (100,100,100)) pdf.set_fill_color(rc, gc, bc) pdf.rect(x + 1, y + 1, bar_w, 4, "F") pdf.ln() # ── 3. Categorized Findings ────────────────────────────────── for section_title, section_records, cols in [ ("Credentials", credentials[:150], [("Identity", 55), ("Password", 45), ("Source", 35), ("Risk", 20), ("Date", 25)]), ("PII Records", pii[:100], [("Identity", 55), ("Phone", 35), ("Name", 40), ("Source", 30), ("Risk", 20)]), ("Dorked Documents", dorked[:80], [("URL", 100), ("Author", 40), ("Type", 20), ("Risk", 20)]), ]: if not section_records: continue pdf.add_page() pdf.set_font("Helvetica", "B", 14) pdf.set_text_color(0, 0, 0) pdf.cell(0, 9, _pdf_safe(f"Findings - {section_title}"), ln=True) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(3) # Header row pdf.set_font("Helvetica", "B", 8) pdf.set_fill_color(30, 30, 30) pdf.set_text_color(255, 255, 255) for col_name, col_w in cols: pdf.cell(col_w, 6, col_name, border=1, fill=True) pdf.ln() pdf.set_text_color(0, 0, 0) for rec in section_records: rs = float(_rec_get(rec, "risk_score") or 0) pdf.set_fill_color(255, 230, 230) if rs >= 90 else \ pdf.set_fill_color(255, 245, 230) if rs >= 70 else \ pdf.set_fill_color(255, 255, 255) pdf.set_font("Helvetica", "", 7) ident = _pdf_safe(_rec_get(rec, "email") or _rec_get(rec, "username") or "-", 35) src = _pdf_safe(_rec_get(rec, "source") or "", 20) rs_s = f"{rs:.0f}" bd = _pdf_safe(_rec_get(rec, "breach_date") or "", 10) if section_title == "Credentials": pw = _pdf_safe(_rec_get(rec, "password") or _rec_get(rec, "password_hash") or "", 30) for val, w in zip([ident, pw, src, rs_s, bd], [c[1] for c in cols]): pdf.cell(w, 5, val, border=1, fill=True) elif section_title == "PII Records": ph = _pdf_safe(_rec_get(rec, "phone") or "", 20) name = _pdf_safe(_rec_get(rec, "name") or getattr(rec, "full_name", "") or "", 25) for val, w in zip([ident, ph, name, src, rs_s], [c[1] for c in cols]): pdf.cell(w, 5, val, border=1, fill=True) else: # Dorked meta = getattr(rec, "metadata", {}) or {} rd = getattr(rec, "raw_data", {}) or {} url = _pdf_safe(rd.get("url", "") if isinstance(rd, dict) else "", 65) auth = _pdf_safe(meta.get("author", ""), 25) ext = _pdf_safe((url.rsplit(".", 1)[-1].split("?")[0] if "." in url else ""), 10) for val, w in zip([url, auth, ext, rs_s], [c[1] for c in cols]): pdf.cell(w, 5, val, border=1, fill=True) pdf.ln() # ── 4. Dork Results ────────────────────────────────────────── dork_results = data.get("dork_results", []) or [] if dork_results: pdf.add_page() pdf.set_font("Helvetica", "B", 14) pdf.set_text_color(0, 0, 0) pdf.cell(0, 9, _pdf_safe(f"Dork Results ({len(dork_results)} hits)"), ln=True) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(3) pdf.set_font("Helvetica", "B", 8) pdf.set_fill_color(30, 30, 30) pdf.set_text_color(255, 255, 255) for col_name, col_w in [("URL / Title", 90), ("Snippet", 55), ("Engine", 20), ("Dork Query", 15)]: pdf.cell(col_w, 6, col_name, border=1, fill=True) pdf.ln() pdf.set_text_color(0, 0, 0) for h in dork_results[:200]: pdf.set_fill_color(245, 245, 255) pdf.set_font("Helvetica", "", 7) url = _pdf_safe(h.get("url", h.get("title", "")), 60) snippet = _pdf_safe(h.get("snippet", ""), 38) engine = _pdf_safe(h.get("engine", ""), 12) dork_q = _pdf_safe(h.get("dork", ""), 12) for val, w in zip([url, snippet, engine, dork_q], [90, 55, 20, 15]): pdf.cell(w, 5, val, border=1, fill=True) pdf.ln() # ── 5. Scrape Results ──────────────────────────────────────── scrape_results = data.get("scrape_results", {}) or {} pastes = scrape_results.get("pastes", []) creds_sc = scrape_results.get("credentials", []) tg_hits = scrape_results.get("telegram", []) mc_hits = scrape_results.get("dork_misconfigs", []) if pastes or creds_sc or tg_hits or mc_hits: pdf.add_page() pdf.set_font("Helvetica", "B", 14) pdf.set_text_color(0, 0, 0) pdf.cell(0, 9, "Scrape Results", ln=True) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(3) paste_links = { "Pastebin": "https://pastebin.com/{}", "Rentry": "https://rentry.co/{}", "Hastebin": "https://hastebin.com/{}", "DPaste": "https://dpaste.org/{}", "Ghostbin": "https://ghostbin.com/paste/{}", "JustPaste":"https://justpaste.it/{}", "ControlC": "https://controlc.com/{}", "Paste2": "https://paste2.org/raw/{}", "PastebinPro": "https://pastebin.com/{}", } if pastes: pdf.set_font("Helvetica", "B", 10) pdf.cell(0, 7, _pdf_safe(f"Pastes ({len(pastes)})"), ln=True) pdf.set_font("Helvetica", "B", 8) pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) for col_name, col_w in [("Site", 25), ("Paste ID / Link", 80), ("Patterns Found", 75)]: pdf.cell(col_w, 6, col_name, border=1, fill=True) pdf.ln(); pdf.set_text_color(0, 0, 0) for p in pastes[:100]: pdf.set_fill_color(245, 245, 245); pdf.set_font("Helvetica", "", 7) site = _pdf_safe(p.get("site", ""), 15) pid = p.get("id", "") tmpl = paste_links.get(p.get("site", ""), "") link = _pdf_safe(tmpl.format(pid) if tmpl and pid else pid, 55) pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items()), 50) for val, w in zip([site, link, pats], [25, 80, 75]): pdf.cell(w, 5, val, border=1, fill=True) pdf.ln() pdf.ln(3) if creds_sc: pdf.set_font("Helvetica", "B", 10) pdf.cell(0, 7, _pdf_safe(f"Extracted Credentials ({len(creds_sc)})"), ln=True) pdf.set_font("Helvetica", "B", 8) pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) for col_name, col_w in [("Raw Credential", 120), ("Source", 30), ("Paste ID", 30)]: pdf.cell(col_w, 6, col_name, border=1, fill=True) pdf.ln(); pdf.set_text_color(0, 0, 0) for c in creds_sc[:150]: pdf.set_fill_color(255, 240, 240); pdf.set_font("Helvetica", "", 7) raw = _pdf_safe(c.get("raw", ""), 80) src = _pdf_safe(c.get("source", ""), 20) pid = _pdf_safe(c.get("paste_id", ""), 20) for val, w in zip([raw, src, pid], [120, 30, 30]): pdf.cell(w, 5, val, border=1, fill=True) pdf.ln() pdf.ln(3) if tg_hits: pdf.set_font("Helvetica", "B", 10) pdf.cell(0, 7, _pdf_safe(f"Telegram CTI ({len(tg_hits)})"), ln=True) pdf.set_font("Helvetica", "B", 8) pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) for col_name, col_w in [("Channel / Link", 50), ("Message Excerpt", 100), ("Patterns", 30)]: pdf.cell(col_w, 6, col_name, border=1, fill=True) pdf.ln(); pdf.set_text_color(0, 0, 0) for t in tg_hits[:80]: pdf.set_fill_color(245, 245, 255); pdf.set_font("Helvetica", "", 7) link = _pdf_safe(f"t.me/s/{t.get('channel','')}", 35) text = _pdf_safe(t.get("text", ""), 70) pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items()), 25) for val, w in zip([link, text, pats], [50, 100, 30]): pdf.cell(w, 5, val, border=1, fill=True) pdf.ln() pdf.ln(3) if mc_hits: pdf.set_font("Helvetica", "B", 10) pdf.cell(0, 7, _pdf_safe(f"Misconfigurations ({len(mc_hits)})"), ln=True) pdf.set_font("Helvetica", "B", 8) pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) for col_name, col_w in [("URL", 90), ("Title", 60), ("Dork", 30)]: pdf.cell(col_w, 6, col_name, border=1, fill=True) pdf.ln(); pdf.set_text_color(0, 0, 0) for m in mc_hits[:80]: pdf.set_fill_color(255, 245, 230); pdf.set_font("Helvetica", "", 7) url_m = _pdf_safe(m.get("url", ""), 60) title_m = _pdf_safe(m.get("title", ""), 40) dork_m = _pdf_safe(m.get("dork", ""), 25) for val, w in zip([url_m, title_m, dork_m], [90, 60, 30]): pdf.cell(w, 5, val, border=1, fill=True) pdf.ln() # ── 6. Discovered Assets ───────────────────────────────────── discovered_assets = data.get("discovered_assets", []) or [] if discovered_assets: pdf.add_page() pdf.set_font("Helvetica", "B", 14) pdf.set_text_color(0, 0, 0) pdf.cell(0, 9, _pdf_safe(f"Discovered Assets ({len(discovered_assets)} reinjected identifiers)"), ln=True) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(3) pdf.set_font("Helvetica", "B", 8) pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) for col_name, col_w in [("Asset", 65), ("Type", 20), ("Phase", 20), ("Reference (Source/URL/Paste)", 55), ("From", 20)]: pdf.cell(col_w, 6, col_name, border=1, fill=True) pdf.ln(); pdf.set_text_color(0, 0, 0) _phase_fills = {"breach": (255,230,230), "dork": (255,245,220), "scrape": (245,230,255), "hash_crack": (245,230,255)} for da in discovered_assets[:300]: phase = da.get("phase", "?") pdf.set_fill_color(*_phase_fills.get(phase, (245, 245, 245))) pdf.set_font("Helvetica", "", 7) for val, w in zip([ _pdf_safe(da.get("asset", ""), 45), _pdf_safe(da.get("qtype", ""), 12), _pdf_safe(phase, 12), _pdf_safe(da.get("ref", ""), 38), _pdf_safe(da.get("parent", ""), 14), ], [65, 20, 20, 55, 20]): pdf.cell(w, 5, val, border=1, fill=True) pdf.ln() # ── 7. Pivot Tree ───────────────────────────────────────────── pivot_log = data.get("pivot_log", []) or [] if pivot_log: pdf.add_page() pdf.set_font("Helvetica", "B", 14) pdf.set_text_color(0, 0, 0) pdf.cell(0, 9, _pdf_safe(f"Pivot Tree ({len(pivot_log)} nodes)"), ln=True) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(3) pdf.set_font("Helvetica", "B", 8) pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) for col_name, col_w in [("D", 8), ("Asset", 55), ("Type", 18), ("Phase", 18), ("Parent", 40), ("Breach", 12), ("Dorks", 12), ("Scrape", 12), ("Cracked", 5)]: pdf.cell(col_w, 6, col_name, border=1, fill=True) pdf.ln(); pdf.set_text_color(0, 0, 0) for e in pivot_log[:300]: pdf.set_fill_color(245, 245, 245); pdf.set_font("Helvetica", "", 7) cracked_str = _pdf_safe(", ".join(e.get("cracked", [])[:2]), 10) for val, w in zip([ str(e.get("depth", 0)), _pdf_safe(e.get("asset", ""), 38), _pdf_safe(e.get("qtype", ""), 12), _pdf_safe(e.get("found_in", ""), 12), _pdf_safe(e.get("parent") or "", 28), str(e.get("records", 0)), str(e.get("dorks", 0)), str(e.get("scrape", 0)), cracked_str, ], [8, 55, 18, 18, 40, 12, 12, 12, 5]): pdf.cell(w, 5, val, border=1, fill=True) pdf.ln() # ── 8. Identity Graph Placeholder ──────────────────────────── pdf.add_page() pdf.set_font("Helvetica", "B", 14) pdf.set_text_color(0, 0, 0) pdf.cell(0, 9, "Identity Relationship Map", ln=True) pdf.line(15, pdf.get_y(), 195, pdf.get_y()) pdf.ln(4) emails = sorted({_rec_get(r, "email") for r in records if _rec_get(r, "email")})[:8] phones = sorted({_rec_get(r, "phone") for r in records if _rec_get(r, "phone")})[:6] usernames = sorted({_rec_get(r, "username") for r in records if _rec_get(r, "username")})[:6] passwords = sorted({_rec_get(r, "password") for r in records if _rec_get(r, "password")})[:5] pdf.set_font("Courier", "", 8) pdf.set_fill_color(245, 255, 245) pdf.rect(15, pdf.get_y(), 180, 120, "F") pdf.set_xy(18, pdf.get_y() + 3) graph_lines = [_pdf_safe(f"[*] TARGET: {target}")] for grp, items, label in [ (emails, emails, "email"), (phones, phones, "phone"), (usernames, usernames, "username"), (passwords, passwords, "password"), ]: if not items: continue graph_lines.append(f" +-- [{label}]") for i, v in enumerate(items): pfx = " | \\--" if i == len(items) - 1 else " | +--" graph_lines.append(_pdf_safe(f"{pfx} {v}", 80)) for line in graph_lines[:30]: pdf.cell(0, 4, line, ln=True) pdf.set_x(18) pdf.output(path) out("ok", f"Forensic PDF saved: {path}") # ======================================================================= # CLI ENTRY POINT # ======================================================================= def main() -> None: initialize_environment() _base = os.path.basename(sys.argv[0]) _prog = os.environ.get("NOX_PROG_NAME") or (f"python3 {_base}" if _base.endswith(".py") else _base) parser = argparse.ArgumentParser( prog=_prog, description=f"NOX v{VERSION} — OSINT Breach Intelligence (120+ JSON plugin sources)", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=( lambda p: f"""Examples: {p} Interactive mode {p} -t user@email.com Scan email {p} -t example.com Scan domain {p} -t example.com --fullscan Full assault + pivot {p} --dork user@email.com Google dorking {p} --scrape user@email.com Web scraping + Telegram {p} --crack Crack a hash {p} --analyze "P@ssw0rd" Password analysis {p} --list-sources List loaded plugins with key status """ )(_prog)) parser.add_argument("-t","--target", help="Target to scan") parser.add_argument("-i","--interactive", action="store_true", help="Interactive mode") parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}") parser.add_argument("--autoscan", action="store_true", help="Full autoscan: scan+pivot+dork+scrape (no args needed, uses -t)") parser.add_argument("--fullscan", action="store_true", help="Full scan+pivot (alias for --autoscan without dork/scrape)") parser.add_argument("--no-pivot", action="store_true", help="Disable recursive pivot enrichment") parser.add_argument("--depth", type=int, default=None, metavar="N", help="Avalanche pivot depth (default: 2)") parser.add_argument("--dork", metavar="TARGET", help="Google dorking") parser.add_argument("--scrape", metavar="TARGET", help="Web scraping + Telegram indexing") parser.add_argument("--crack", metavar="HASH", help="Crack a hash (WARNING: submits hash to public rainbow-table APIs — use --no-online-crack to disable)") parser.add_argument("--no-online-crack", action="store_true", help="Disable online rainbow-table APIs for hash cracking (local wordlist only, no data sent to third parties)") parser.add_argument("--analyze", metavar="PASS", help="Analyze password") parser.add_argument("--list-sources", action="store_true", help="List loaded plugins with input_type, confidence, key status") parser.add_argument("--tor", action="store_true", help="Enable Tor") parser.add_argument("--proxy", metavar="URL", help="HTTP/S or SOCKS5 proxy URL") parser.add_argument("--allow-leak", action="store_true", help="Bypass fail-safe: allow direct connection if proxy/Tor is unavailable (OPSEC risk)") parser.add_argument("--guardian-off", action="store_true", help="Alias for --allow-leak: disable Guardian OPSEC kill-switch (direct connection)") parser.add_argument("--reset-sources", action="store_true", help="Force resync of all source plugins from package (overwrites user modifications)") parser.add_argument("--threads", type=int, default=20, help="Max concurrency") parser.add_argument("--timeout", type=int, default=15, help="Request timeout") parser.add_argument("-o","--output", metavar="FILE", help="Output file") parser.add_argument("--format", choices=["json","csv","html","md","pdf"], default="json", help="Output format") parser.add_argument("--diff", action="store_true", help="Compare current scan against the last cached scan and highlight new findings only") args = parser.parse_args() config = NoxConfig() # Apply ~/.nox/config.ini settings before CLI args (CLI takes precedence) Config.apply(config) if args.tor: config.use_tor = True config.proxy = f"socks5h://127.0.0.1:{config.tor_socks}" if args.proxy: config.proxy = args.proxy config.allow_leak = args.allow_leak or getattr(args, "guardian_off", False) config.no_online_crack = getattr(args, "no_online_crack", False) config.max_threads = config.concurrency = args.threads config.timeout = args.timeout config.no_pivot = args.no_pivot if getattr(args, "depth", None) is not None: config.pivot_depth = args.depth db = NoxDB() try: _main_run(args, config, db) finally: db.close() def _main_run(args, config: NoxConfig, db: NoxDB) -> None: orc = Orchestrator(config, db) # --list-sources if getattr(args, "list_sources", False): repl = REPL.__new__(REPL) repl.orc = orc repl.db = db repl.config = config repl._sources() return if getattr(args, "reset_sources", False): import shutil as _shutil candidate = _PKG_ROOT / "sources" if not candidate.is_dir(): candidate = Path("/usr/share/nox-cli/sources") if candidate.is_dir(): # Copy all current package sources to runtime dir count = 0 pkg_names = set() for jf in candidate.glob("*.json"): pkg_names.add(jf.name) dst = SOURCE_DIR / jf.name try: _shutil.copy2(jf, dst) count += 1 except OSError: pass # Remove orphaned plugins no longer in the package removed = 0 for existing in SOURCE_DIR.glob("*.json"): if existing.name not in pkg_names: try: existing.unlink() removed += 1 except OSError: pass msg = f"Reset {count} source plugins from package." if removed: msg += f" Removed {removed} orphaned plugin(s)." out("ok", msg) else: out("warn", "Package sources directory not found.") return if args.crack: if getattr(config, "no_online_crack", False): out("warn", "Online rainbow-table APIs disabled (--no-online-crack). Local wordlist only.") result = orc.crack(args.crack) out("info", f"Types: {', '.join(t[0] for t in result.get('types',[]))}") if result.get("plaintext"): out("ok", f"CRACKED: {result['plaintext']} (via {result['method']})") else: out("warn", "Could not crack.") return if args.analyze: repl = REPL.__new__(REPL) repl.orc = orc repl._analyze(args.analyze) return if args.dork: results = orc.dork(args.dork) out("ok", f"Dorking: {len(results)} results") for i, r in enumerate(results[:20], 1): title = (r.get('title','') or r.get('dork',''))[:70] url = r.get("url", "") snippet = r.get("snippet", "")[:100] dork_q = r.get("dork", "")[:60] engine = r.get("engine", "") eng_tag = f" {C.DM}[{engine}]{C.X}" if engine else "" print(f" {C.Y}{i:2}.{C.W} {title}{eng_tag}") if url: print(f" {C.DM}{url[:80]}{C.X}") if snippet: print(f" {C.DM}{snippet}{C.X}") if dork_q and dork_q != title: print(f" {C.DM}dork: {dork_q}{C.X}") if len(results) > 20: print(f" {C.DM} … and {len(results)-20} more — use -o for full export{C.X}") if args.output: data = {"target": args.dork, "records": [], "dork_results": results, "scrape_results": {}} if args.format == "json": Reporter.to_json(data, args.output) elif args.format == "html": Reporter.to_html(data, args.output) elif args.format == "md": Reporter.to_markdown(data, args.output) elif args.format == "pdf": Reporter.to_pdf(data, args.output) elif args.format == "csv": resolved = Reporter._resolve_path(args.output, "csv") import csv as _csv with open(resolved, "w", newline="", encoding="utf-8") as f: w = _csv.DictWriter(f, fieldnames=["url","title","snippet","dork","engine"], extrasaction="ignore") w.writeheader(); w.writerows(results) out("ok", f"Dork CSV saved: {resolved}") return if args.scrape: results = orc.scrape(args.scrape) pastes = results.get('pastes',[]); creds = results.get('credentials',[]) tg = results.get('telegram',[]); mc = results.get('dork_misconfigs',[]) out("ok", f"Pastes: {len(pastes)} | Credentials: {len(creds)} | " f"Hashes: {len(results.get('hashes',[]))} | Telegram: {len(tg)} | Misconfigs: {len(mc)}") _ptmpl = {"Pastebin":"https://pastebin.com/{}","Rentry":"https://rentry.co/{}", "Hastebin":"https://hastebin.com/{}","DPaste":"https://dpaste.org/{}"} for p in pastes[:8]: pid = p.get("id",""); site = p.get("site","") url = _ptmpl.get(site,"").format(pid) if _ptmpl.get(site) and pid else "" pats = ", ".join(f"{k}({len(v)})" for k,v in (p.get("patterns") or {}).items()) print(f" {C.P}[paste]{C.W} [{site}] {(p.get('title') or pid)[:50]} {C.DM}{pats}{C.X}") if url: print(f" {C.DM}{url}{C.X}") if len(pastes) > 8: print(f" {C.DM} … and {len(pastes)-8} more pastes{C.X}") for c in creds[:12]: src = c.get("source",""); pid = c.get("paste_id","") ref = f"[{src or pid}]" if (src or pid) else "" print(f" {C.R}[cred]{C.W} {c.get('raw','')[:80]} {C.DM}{ref}{C.X}") if len(creds) > 12: print(f" {C.DM} … and {len(creds)-12} more credentials{C.X}") for t in tg[:5]: pats = ", ".join(f"{k}({len(v)})" for k,v in (t.get("patterns") or {}).items()) print(f" {C.CY}[tg]{C.W} [{t.get('channel','')}] {t.get('text','')[:70]} {C.DM}{pats}{C.X}") if len(tg) > 5: print(f" {C.DM} … and {len(tg)-5} more telegram hits{C.X}") for m in mc[:5]: print(f" {C.O}[misc]{C.W} {m.get('title','')[:60]}") if m.get("url"): print(f" {C.DM}{m['url'][:80]}{C.X}") if m.get("dork"): print(f" {C.DM}dork: {m['dork'][:60]}{C.X}") if len(mc) > 5: print(f" {C.DM} … and {len(mc)-5} more misconfigs{C.X}") if args.output: data = {"target": args.scrape, "records": [], "dork_results": [], "scrape_results": results} if args.format == "json": Reporter.to_json(data, args.output) elif args.format == "html": Reporter.to_html(data, args.output) elif args.format == "md": Reporter.to_markdown(data, args.output) elif args.format == "pdf": Reporter.to_pdf(data, args.output) elif args.format == "csv": REPL._export_csv_extras(data, Reporter._resolve_path(args.output, "csv")) return if args.target: if args.autoscan or args.fullscan: try: result = asyncio.run(orc.fullscan(args.target, pivot=not args.no_pivot)) except KeyboardInterrupt: print() out("warn", "Scan interrupted.") sys.exit(0) records = result.get("records",[]) else: records = orc.scan(args.target) HVTAnalyzer.annotate(records) result = { "target": args.target, "records": records, "analysis": CredAnalyzer.analyze(records), "hvt_records": HVTAnalyzer.filter_hvt(records), "dork_results": [], "scrape_results": {}, "pivot_chain": [args.target], "pivot_log": [], "discovered_assets": [], "scan_meta": {"pivot_depth": 0, "nodes_discovered": len(records)}, } analysis = result.get("analysis") or CredAnalyzer.analyze(records) # ── --diff: surface only new findings vs last cached scan ── if getattr(args, "diff", False): try: prev_rows = db.get_creds(args.target) prev_keys = { hashlib.sha256( f"{r.get('email','') or r.get('username','')}:{r.get('password','')}".encode() ).hexdigest() for r in prev_rows } new_records = [ r for r in records if hashlib.sha256( f"{r.email or r.username}:{r.password}".encode() ).hexdigest() not in prev_keys ] out("info", f"--diff: {len(new_records)} new findings vs last cached scan ({len(records) - len(new_records)} already known)") records = new_records result["records"] = new_records except Exception as _de: out("warn", f"--diff failed, showing full results: {_de}") repl = REPL.__new__(REPL) repl.orc = orc repl.db = db repl.config = config repl._last_full = result repl._last = records repl._print_summary(analysis) if args.autoscan or args.fullscan: dorks = result.get("dork_results",[]) if dorks: out("info", f"Dorking Results: {len(dorks)}") for d in dorks[:10]: title = (d.get('title','') or d.get('dork',''))[:70] print(f" {C.Y}→{C.W} {title}") if d.get("url"): print(f" {C.DM}{d['url'][:80]}{C.X}") if len(dorks) > 10: print(f" {C.DM} … and {len(dorks)-10} more — use -o for full export{C.X}") scrape = result.get("scrape_results",{}) creds = scrape.get("credentials",[]) if creds: out("info", f"Scraped Credentials: {len(creds)}") for c in creds[:10]: print(f" {C.R}→{C.W} {c.get('raw','')}") if len(creds) > 10: print(f" {C.DM} … and {len(creds)-10} more{C.X}") tg = scrape.get("telegram",[]) if tg: out("info", f"Telegram Hits: {len(tg)}") for t in tg[:5]: print(f" {C.CY}→{C.W} [{t.get('channel','')}] {t.get('text','')[:80]}") if len(tg) > 5: print(f" {C.DM} … and {len(tg)-5} more{C.X}") mc = scrape.get("dork_misconfigs",[]) if mc: out("info", f"Misconfigurations: {len(mc)}") for m in mc[:5]: print(f" {C.O}→{C.W} {m.get('title','')[:70]}") if len(mc) > 5: print(f" {C.DM} … and {len(mc)-5} more{C.X}") da = result.get("discovered_assets", []) if da: out("info", f"Reinjected Assets: {len(da)}") _pc = {"breach": C.R, "dork": C.O, "scrape": C.P, "hash_crack": C.P} for d in da[:15]: pc = _pc.get(d.get("phase",""), C.DM) print(f" {pc}[{d.get('phase','?')}]{C.W} {d.get('asset','')} " f"{C.DM}({d.get('qtype','')}) ← {d.get('ref','')[:60]}{C.X}") if len(da) > 15: print(f" {C.DM} … and {len(da)-15} more — use -o for full export{C.X}") if args.output: if args.format == "json": Reporter.to_json(result, args.output) elif args.format == "csv": Reporter.to_csv(records, args.output) REPL._export_csv_extras(result, Reporter._resolve_path(args.output, "csv")) elif args.format == "html": Reporter.to_html(result, args.output) elif args.format == "md": Reporter.to_markdown(result, args.output) elif args.format == "pdf": Reporter.to_pdf(result, args.output) return # Interactive mode repl = REPL() repl.orc = orc repl.config = config repl.db = db repl.run() if __name__ == "__main__": try: main() except KeyboardInterrupt: print() out("warn", "Interrupted.") sys.exit(0)

🔍 Discovered Public Documents & Metadata

🔎 Dork Results ({len(dork_results)} hits)

📋 Scrape Results

Pastes ({len(scrape_results.get("pastes",[]))})

Extracted Credentials ({len(scrape_results.get("credentials",[]))})

Telegram CTI ({len(scrape_results.get("telegram",[]))})

Misconfigurations ({len(scrape_results.get("dork_misconfigs",[]))})

🔄 Pivot Tree ({len(pivot_log)} nodes)

🔎 Discovered Assets ({len(discovered_assets)} new identifiers reinjected)

[ NOX Framework ]

📋 Executive Summary

Risk Heatmap

Password Patterns

⚠ High-Value Targets ({s["hvt_count"]})

🚨 Top Threats

Credential Records (top 500)