mirror of
https://github.com/nox-project/nox-framework.git
synced 2026-06-08 16:07:17 +00:00
NOX Framework v1.0.0
This commit is contained in:
@@ -0,0 +1,243 @@
|
||||
"""
|
||||
sources/helpers/config_handler.py — NOX Framework
|
||||
Unified credential management via ~/.config/nox-cli/apikeys.json (XDG).
|
||||
|
||||
Priority: environment variable → apikeys.json → None
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional
|
||||
|
||||
# ── Shared constant — import this everywhere instead of a raw string ───
|
||||
UNIVERSAL_PLACEHOLDER = "INSERT_API_KEY_HERE"
|
||||
|
||||
# ── XDG config path ────────────────────────────────────────────────────
|
||||
_CONFIG_DIR = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) / "nox-cli"
|
||||
_APIKEYS_FILE = _CONFIG_DIR / "apikeys.json"
|
||||
|
||||
# ── Complete service registry ──────────────────────────────────────────
|
||||
# Format: key_name → {"display": str, "public": bool}
|
||||
# public=True → no key needed, always active
|
||||
# public=False → requires a real API key (goes into apikeys.json)
|
||||
SERVICE_REGISTRY: Dict[str, Dict] = {
|
||||
# ── Public / keyless ──────────────────────────────────────────────
|
||||
"alienvault_otx_domain": {"display": "AlienVault OTX (Domain)", "public": True},
|
||||
"alienvault_otx_ip": {"display": "AlienVault OTX (IP)", "public": True},
|
||||
"alienvault_otx_malware": {"display": "AlienVault OTX (Malware)", "public": True},
|
||||
"alienvault_otx_user": {"display": "AlienVault OTX (User)", "public": True},
|
||||
"anubis_subdomains": {"display": "Anubis Subdomains", "public": True},
|
||||
"bgpview_ip": {"display": "BGPView IP", "public": True},
|
||||
"checkleaked": {"display": "CheckLeaked", "public": True},
|
||||
"crt_sh": {"display": "crt.sh", "public": True},
|
||||
"cve_search": {"display": "CVE Search", "public": True},
|
||||
"cxsecurity": {"display": "CXSecurity", "public": True},
|
||||
"duckduckgo_api": {"display": "Google / DDG Dorks", "public": True},
|
||||
"emailrep_io": {"display": "EmailRep.io", "public": True},
|
||||
"github_users": {"display": "GitHub Users", "public": True},
|
||||
"gitlab_search": {"display": "GitLab Search", "public": True},
|
||||
"gravatar": {"display": "Gravatar", "public": True},
|
||||
"hackernews_user": {"display": "HackerNews User", "public": True},
|
||||
"hackertarget_dnslookup": {"display": "HackerTarget DNS Lookup", "public": True},
|
||||
"hackertarget_hostsearch": {"display": "HackerTarget Host Search", "public": True},
|
||||
"hackertarget_reverseip": {"display": "HackerTarget Reverse IP", "public": True},
|
||||
"hackertarget_whois": {"display": "WHOIS (HackerTarget)", "public": True},
|
||||
"hudsonrock_osint": {"display": "HudsonRock OSINT", "public": True},
|
||||
"ipapi_co": {"display": "ipapi.co", "public": True},
|
||||
"ipinfo_io": {"display": "IPInfo.io", "public": True},
|
||||
"ipvigilante": {"display": "IPVigilante", "public": True},
|
||||
"keybase_lookup": {"display": "Keybase Lookup", "public": True},
|
||||
"keybase_proofs": {"display": "Keybase Proofs", "public": True},
|
||||
"maltiverse_ip": {"display": "Maltiverse IP", "public": True},
|
||||
"npm_user": {"display": "NPM User", "public": True},
|
||||
"packetstorm": {"display": "PacketStorm", "public": True},
|
||||
"phishtank_check": {"display": "PhishTank", "public": True},
|
||||
"pulsedive": {"display": "Pulsedive (Free)", "public": True},
|
||||
"pypi_user": {"display": "PyPI User", "public": True},
|
||||
"reddit_user": {"display": "Reddit User", "public": True},
|
||||
"robtex_ip": {"display": "Robtex IP", "public": True},
|
||||
"scamwatcher": {"display": "ScamWatcher", "public": True},
|
||||
"social_scan": {"display": "Social Scan", "public": True},
|
||||
"sublist3r_api": {"display": "Sublist3r API", "public": True},
|
||||
"threatcrowd_domain": {"display": "ThreatCrowd (Domain)", "public": True},
|
||||
"threatcrowd_email": {"display": "ThreatCrowd (Email)", "public": True},
|
||||
"threatminer_domain": {"display": "ThreatMiner (Domain)", "public": True},
|
||||
"threatminer_ip": {"display": "ThreatMiner (IP)", "public": True},
|
||||
"urlscan_search": {"display": "URLScan.io", "public": True},
|
||||
"vigilante_pw": {"display": "Vigilante.pw", "public": True},
|
||||
"wayback_machine": {"display": "Wayback Machine", "public": True},
|
||||
# ── Private / key-required ────────────────────────────────────────
|
||||
"ABSTRACT_API_KEY": {"display": "Abstract Email Validation", "public": False},
|
||||
"ABUSEIPDB_API_KEY": {"display": "AbuseIPDB", "public": False},
|
||||
"ANYRUN_API_KEY": {"display": "Any.run", "public": False},
|
||||
"BA_API_KEY": {"display": "BreachAware", "public": False},
|
||||
"BD_API_KEY": {"display": "BreachDirectory", "public": False},
|
||||
"BINARYEDGE_API_KEY": {"display": "BinaryEdge", "public": False},
|
||||
"BING_API_KEY": {"display": "Bing Search API", "public": False},
|
||||
"CENSYS_AUTH_BASE64": {"display": "Censys", "public": False},
|
||||
"CIRCL_AUTH_BASE64": {"display": "CIRCL.lu PDNS", "public": False},
|
||||
"CIT0DAY_API_KEY": {"display": "Cit0day", "public": False},
|
||||
"CLEARBIT_API_KEY": {"display": "Clearbit Enrich", "public": False},
|
||||
"CRIMINALIP_API_KEY": {"display": "CriminalIP", "public": False},
|
||||
"DEHASHED_AUTH_BASE64": {"display": "Dehashed", "public": False},
|
||||
"DNSDB_API_KEY": {"display": "DNSDB Passive DNS", "public": False},
|
||||
"DT_AUTH_BASE64": {"display": "DomainTools WHOIS", "public": False},
|
||||
"EXTREME_API_KEY": {"display": "Extreme IP Lookup", "public": False},
|
||||
"FLP_API_KEY": {"display": "FraudLabsPro", "public": False},
|
||||
"FOFA_API_KEY": {"display": "FOFA", "public": False},
|
||||
"FOFA_EMAIL": {"display": "FOFA (account email)", "public": False},
|
||||
"FULLCONTACT_API_KEY": {"display": "FullContact", "public": False},
|
||||
"GITHUB_TOKEN": {"display": "GitHub (Code/Repo Search)", "public": False},
|
||||
"GOOGLE_API_KEY": {"display": "Google Safe Browsing", "public": False},
|
||||
"GOOGLE_CX_KEY": {"display": "Google Custom Search (API key)", "public": False},
|
||||
"GOOGLE_CX_ID": {"display": "Google Custom Search (CX ID)", "public": False},
|
||||
"GREYNOISE_API_KEY": {"display": "GreyNoise", "public": False},
|
||||
"HASHES_API_KEY": {"display": "Hashes.org", "public": False},
|
||||
"HIBP_API_KEY": {"display": "HaveIBeenPwned", "public": False},
|
||||
"HIPPO_API_KEY": {"display": "EmailHippo", "public": False},
|
||||
"HUNTER_API_KEY": {"display": "Hunter.io", "public": False},
|
||||
"HYBRID_API_KEY": {"display": "Hybrid Analysis", "public": False},
|
||||
"INTELX_API_KEY": {"display": "IntelX", "public": False},
|
||||
"INTEZER_API_KEY": {"display": "Intezer", "public": False},
|
||||
"IPDATA_API_KEY": {"display": "IPData.co", "public": False},
|
||||
"IPGEO_API_KEY": {"display": "IPGeolocation.io", "public": False},
|
||||
"IPINFODB_API_KEY": {"display": "IPInfoDB", "public": False},
|
||||
"IPQS_API_KEY": {"display": "IPQualityScore", "public": False},
|
||||
"IPSTACK_API_KEY": {"display": "IPStack", "public": False},
|
||||
"JOE_API_KEY": {"display": "Joe Sandbox", "public": False},
|
||||
"LEAKCHECK_API_KEY": {"display": "LeakCheck", "public": False},
|
||||
"LEAKIX_API_KEY": {"display": "LeakIX", "public": False},
|
||||
"LEAKSTATS_API_KEY": {"display": "LeakStats.pw", "public": False},
|
||||
"MAILBOX_API_KEY": {"display": "Mailboxlayer", "public": False},
|
||||
"MALSHARE_API_KEY": {"display": "MalShare", "public": False},
|
||||
"METADEFENDER_API_KEY": {"display": "MetaDefender", "public": False},
|
||||
"MISP_API_KEY": {"display": "MISP", "public": False},
|
||||
"NUMVERIFY_API_KEY": {"display": "Numverify", "public": False},
|
||||
"ONYPHE_API_KEY": {"display": "Onyphe", "public": False},
|
||||
"PASSIVETOTAL_AUTH_BASE64": {"display": "PassiveTotal / RiskIQ", "public": False},
|
||||
"PIPL_API_KEY": {"display": "Pipl", "public": False},
|
||||
"PULSEDIVE_API_KEY": {"display": "Pulsedive (Premium)", "public": False},
|
||||
"RF_TOKEN": {"display": "Recorded Future", "public": False},
|
||||
"SECURITYTRAILS_API_KEY": {"display": "SecurityTrails", "public": False},
|
||||
"SHODAN_API_KEY": {"display": "Shodan", "public": False},
|
||||
"SNUSBASE_API_KEY": {"display": "Snusbase", "public": False},
|
||||
"SPYCLOUD_API_KEY": {"display": "SpyCloud", "public": False},
|
||||
"SPYONWEB_API_KEY": {"display": "SpyOnWeb", "public": False},
|
||||
"SPYSE_API_KEY": {"display": "Spyse", "public": False},
|
||||
"TC_API_KEY": {"display": "ThreatConnect", "public": False},
|
||||
"TINES_API_KEY": {"display": "Tines Breach", "public": False},
|
||||
"TP_API_KEY": {"display": "ThreatPortal", "public": False},
|
||||
"TWITTER_BEARER_TOKEN": {"display": "Twitter / X API v2", "public": False},
|
||||
"URLVOID_API_KEY": {"display": "URLVoid", "public": False},
|
||||
"VIEWDNS_API_KEY": {"display": "ViewDNS", "public": False},
|
||||
"VIRUSTOTAL_API_KEY": {"display": "VirusTotal", "public": False},
|
||||
"VULNERS_API_KEY": {"display": "Vulners", "public": False},
|
||||
"WF_API_KEY": {"display": "WhoisFreaks", "public": False},
|
||||
"WHOISXML_API_KEY": {"display": "WhoisXML API", "public": False},
|
||||
"WHOXY_API_KEY": {"display": "Whoxy WHOIS", "public": False},
|
||||
"ZEROBOUNCE_API_KEY": {"display": "ZeroBounce", "public": False},
|
||||
"ZOOMEYE_API_KEY": {"display": "ZoomEye", "public": False},
|
||||
}
|
||||
|
||||
_PRIVATE_KEYS = {k: v for k, v in SERVICE_REGISTRY.items() if not v["public"]}
|
||||
|
||||
|
||||
# ── Store helpers ──────────────────────────────────────────────────────
|
||||
|
||||
def _default_store() -> Dict[str, str]:
|
||||
"""Return a dict of all private service keys set to UNIVERSAL_PLACEHOLDER."""
|
||||
return {k: UNIVERSAL_PLACEHOLDER for k in _PRIVATE_KEYS}
|
||||
|
||||
|
||||
def _write_store(data: Dict[str, str]) -> None:
|
||||
"""Atomically write data to apikeys.json with chmod 0600."""
|
||||
try:
|
||||
_CONFIG_DIR.mkdir(mode=0o700, parents=True, exist_ok=True)
|
||||
_CONFIG_DIR.chmod(0o700)
|
||||
tmp = _APIKEYS_FILE.with_suffix(".tmp")
|
||||
tmp.write_text(json.dumps(data, indent=4, sort_keys=True), encoding="utf-8")
|
||||
tmp.replace(_APIKEYS_FILE)
|
||||
_APIKEYS_FILE.chmod(0o600)
|
||||
except PermissionError as exc:
|
||||
raise RuntimeError(f"[config_handler] Cannot write {_APIKEYS_FILE}: {exc}") from exc
|
||||
|
||||
|
||||
def _load_store() -> Dict[str, str]:
|
||||
"""Load apikeys.json, creating it with defaults if absent. Self-heals on corrupt files."""
|
||||
_CONFIG_DIR.mkdir(mode=0o700, parents=True, exist_ok=True)
|
||||
_CONFIG_DIR.chmod(0o700)
|
||||
if not _APIKEYS_FILE.exists():
|
||||
print(" \033[92m[+]\033[0m Initializing NOX Environment in ~/.config/nox-cli/")
|
||||
_write_store(_default_store())
|
||||
return _default_store()
|
||||
try:
|
||||
text = _APIKEYS_FILE.read_text(encoding="utf-8").strip()
|
||||
if not text:
|
||||
raise json.JSONDecodeError("Empty file", "", 0)
|
||||
data = json.loads(text)
|
||||
if not isinstance(data, dict):
|
||||
raise json.JSONDecodeError("Root is not a JSON object", text, 0)
|
||||
# Back-fill keys added in newer versions
|
||||
new_keys = {k: UNIVERSAL_PLACEHOLDER for k in _PRIVATE_KEYS if k not in data}
|
||||
if new_keys:
|
||||
data.update(new_keys)
|
||||
_write_store(data)
|
||||
return data
|
||||
except json.JSONDecodeError:
|
||||
bak = _APIKEYS_FILE.with_suffix(".json.bak")
|
||||
_APIKEYS_FILE.rename(bak)
|
||||
print(f"[!] Malformed apikeys.json detected — backed up to {bak.name} and reset to defaults.")
|
||||
defaults = _default_store()
|
||||
_write_store(defaults)
|
||||
return defaults
|
||||
except PermissionError as exc:
|
||||
raise RuntimeError(f"[config_handler] Cannot read {_APIKEYS_FILE}: {exc}") from exc
|
||||
|
||||
|
||||
# ── ConfigManager ──────────────────────────────────────────────────────
|
||||
|
||||
class ConfigManager:
|
||||
"""
|
||||
Unified API key manager.
|
||||
|
||||
Resolution order per key:
|
||||
1. Environment variable (exact key name)
|
||||
2. ~/.config/nox-cli/apikeys.json
|
||||
3. Returns None if value equals UNIVERSAL_PLACEHOLDER or is absent
|
||||
"""
|
||||
|
||||
_cache: Dict[str, Optional[str]] = {}
|
||||
_store: Optional[Dict[str, str]] = None
|
||||
|
||||
@classmethod
|
||||
def _get_store(cls) -> Dict[str, str]:
|
||||
if cls._store is None:
|
||||
cls._store = _load_store()
|
||||
return cls._store
|
||||
|
||||
@classmethod
|
||||
def get_key(cls, key_name: str) -> Optional[str]:
|
||||
"""Return the configured value, or None if missing/placeholder."""
|
||||
if key_name in cls._cache:
|
||||
return cls._cache[key_name]
|
||||
val = os.environ.get(key_name, "") or cls._get_store().get(key_name, "")
|
||||
result = None if (not val or val == UNIVERSAL_PLACEHOLDER) else val
|
||||
cls._cache[key_name] = result
|
||||
return result
|
||||
|
||||
# Backward-compatible alias used by nox.py internals
|
||||
get = get_key
|
||||
|
||||
@classmethod
|
||||
def set(cls, key_name: str, value: str) -> None:
|
||||
"""Persist a key to apikeys.json and update the in-memory cache."""
|
||||
store = cls._get_store()
|
||||
store[key_name] = value
|
||||
_write_store(store)
|
||||
cls._cache[key_name] = None if value == UNIVERSAL_PLACEHOLDER else value
|
||||
|
||||
@classmethod
|
||||
def config_path(cls) -> Path:
|
||||
return _APIKEYS_FILE
|
||||
@@ -0,0 +1,119 @@
|
||||
"""
|
||||
sources/helpers/cracker.py
|
||||
Resilient async hash cracker for NOX autoscan.
|
||||
|
||||
Detects MD5 / SHA1 / SHA256 / bcrypt hashes inside breach records,
|
||||
fires background crack attempts against available APIs, and returns
|
||||
results without ever blocking the main pivot pipeline.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
# C2: MD5 and NTLM share the same 32-char hex pattern.
|
||||
# We list md5 first (most common in breach data) but also accept ntlm
|
||||
# so callers can query NTLM-specific APIs when needed.
|
||||
_PATTERNS: List[Tuple[str, re.Pattern]] = [
|
||||
("bcrypt", re.compile(r"^\$2[aby]?\$\d{2}\$.{53}$")),
|
||||
("sha256", re.compile(r"^[a-f0-9]{64}$", re.I)),
|
||||
("sha1", re.compile(r"^[a-f0-9]{40}$", re.I)),
|
||||
("md5", re.compile(r"^[a-f0-9]{32}$", re.I)),
|
||||
# ntlm shares the 32-char hex pattern — detected as md5 first,
|
||||
# but async_crack queries both md5 and ntlm APIs for 32-char hashes.
|
||||
]
|
||||
|
||||
# Writes to ~/.config/nox-cli/logs/nox_system.log — never to terminal
|
||||
_syslog = logging.getLogger("nox.system")
|
||||
|
||||
# Per-API timeout — each individual rainbow-table query budget
|
||||
_API_TIMEOUT = 8
|
||||
# Global crack budget — hard cap regardless of API count or response order
|
||||
CRACK_TIMEOUT = 20
|
||||
|
||||
|
||||
def detect_hash(value: str) -> Optional[str]:
|
||||
"""Return hash type string if value matches a known hash pattern, else None."""
|
||||
v = value.strip()
|
||||
for htype, pat in _PATTERNS:
|
||||
if pat.match(v):
|
||||
return htype
|
||||
return None
|
||||
|
||||
|
||||
async def _query_api(session, url: str, fmt: str) -> Optional[str]:
|
||||
"""Single API query — returns plaintext or None. Never raises."""
|
||||
try:
|
||||
import aiohttp
|
||||
to = aiohttp.ClientTimeout(total=_API_TIMEOUT)
|
||||
async with session.get(url, timeout=to) as resp:
|
||||
if resp.status != 200:
|
||||
return None
|
||||
if fmt == "text":
|
||||
text = (await resp.text()).strip()
|
||||
# Reject empty, too-long, or obvious error responses
|
||||
if not text or len(text) > 128:
|
||||
return None
|
||||
tl = text.lower()
|
||||
if any(tl.startswith(p) for p in ("not found", "error", "invalid", "no result", "not in", "cmd5-error", "not exist", "code erreur", "erreur", "unknown")):
|
||||
return None
|
||||
return text
|
||||
data = await resp.json(content_type=None)
|
||||
return data.get("result") or data.get("plaintext") or data.get("plain") or None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def async_crack(session, hash_value: str, hash_type: str) -> Optional[str]:
|
||||
"""
|
||||
Query multiple rainbow-table APIs concurrently.
|
||||
Returns first plaintext found, or None. bcrypt is skipped.
|
||||
|
||||
C1: create tasks upfront for cancellation, but await each via asyncio.shield
|
||||
inside as_completed — no double wait_for wrapping.
|
||||
C2: for 32-char hex (md5/ntlm ambiguity), also query NTLM-specific APIs.
|
||||
|
||||
Per-API timeout: 8s. Global budget: 20s (CRACK_TIMEOUT).
|
||||
All tasks are cancelled as soon as the first result is found.
|
||||
"""
|
||||
if hash_type == "bcrypt":
|
||||
return None
|
||||
|
||||
h = hash_value.strip().lower()
|
||||
apis = [
|
||||
(f"https://www.nitrxgen.net/md5db/{h}", "text"),
|
||||
(f"https://hashes.com/en/api/hash?hash={h}", "json"),
|
||||
(f"https://hash.help/api/lookup/{h}", "json"),
|
||||
(f"https://hashkiller.io/api/search.php?hash={h}", "json"),
|
||||
(f"https://md5decrypt.net/Api/api.php?hash={h}&hash_type={hash_type}&email=&code=", "text"),
|
||||
(f"https://www.cmd5.org/api.ashx?hash={h}", "text"),
|
||||
]
|
||||
# C2: for 32-char hashes (md5/ntlm ambiguous), add NTLM-specific endpoint
|
||||
if hash_type == "md5" and len(h) == 32:
|
||||
apis.append((f"https://hashes.com/en/api/hash?hash={h}&type=ntlm", "json"))
|
||||
|
||||
# C1: create tasks so we can cancel them; shield each before passing to wait_for
|
||||
# so cancellation of the shield future does not cancel the underlying task prematurely.
|
||||
tasks = [asyncio.create_task(_query_api(session, url, fmt)) for url, fmt in apis]
|
||||
result: Optional[str] = None
|
||||
try:
|
||||
for fut in asyncio.as_completed(tasks):
|
||||
try:
|
||||
res = await asyncio.wait_for(asyncio.shield(fut), timeout=_API_TIMEOUT)
|
||||
except (asyncio.TimeoutError, asyncio.CancelledError):
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
if res:
|
||||
result = res
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
# Cancel all remaining tasks and await to suppress pending-task warnings
|
||||
for t in tasks:
|
||||
if not t.done():
|
||||
t.cancel()
|
||||
await asyncio.gather(*[t for t in tasks if not t.done()], return_exceptions=True)
|
||||
return result
|
||||
@@ -0,0 +1,658 @@
|
||||
"""
|
||||
sources/helpers/reporting.py
|
||||
NOX Enterprise Reporting — Executive Summary, Pivot Chain, Data Sanitization.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import html as _html
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# ── Noise patterns stripped from all report output ────────────────────
|
||||
_NOISE_RE = re.compile(
|
||||
r"(Traceback \(most recent|File \".*\.py\"|TimeoutError|ProxyError"
|
||||
r"|ConnectionError|aiohttp\.|ClientConnector|ssl\.|asyncio\."
|
||||
r"|Task exception|NoneType|Object of type)",
|
||||
re.I,
|
||||
)
|
||||
_CTRL_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]")
|
||||
|
||||
|
||||
def _nox_ver() -> str:
|
||||
try:
|
||||
from nox import VERSION # type: ignore
|
||||
return VERSION
|
||||
except ImportError:
|
||||
return "1.0.0"
|
||||
|
||||
|
||||
def _clean(v: Any, maxlen: int = 200) -> str:
|
||||
"""Strip control chars, technical noise, HTML-escape, truncate."""
|
||||
s = str(v) if v is not None else ""
|
||||
s = _CTRL_RE.sub("", s)
|
||||
if _NOISE_RE.search(s):
|
||||
return ""
|
||||
return _html.escape(s[:maxlen])
|
||||
|
||||
|
||||
def _raw(v: Any, maxlen: int = 200) -> str:
|
||||
"""Strip control chars only — no HTML escaping (PDF / plain-text paths)."""
|
||||
s = str(v) if v is not None else ""
|
||||
s = _CTRL_RE.sub("", s)
|
||||
if _NOISE_RE.search(s):
|
||||
return ""
|
||||
return s[:maxlen]
|
||||
|
||||
|
||||
def _pdf_safe(s: str, maxlen: int = 180) -> str:
|
||||
# D4: sanitize for fpdf2 core fonts (latin-1 subset).
|
||||
# NFKD normalization decomposes accented chars (é→e + combining accent)
|
||||
# so common accented Latin characters survive as their base letter.
|
||||
# Truly non-latin-1 chars (Cyrillic, CJK, etc.) become '?' — intentional:
|
||||
# fpdf2 core fonts cannot render them and would raise UnicodeEncodeError.
|
||||
s = _raw(s, maxlen)
|
||||
try:
|
||||
import unicodedata
|
||||
normalized = unicodedata.normalize("NFKD", s)
|
||||
return normalized.encode("ascii", errors="replace").decode("ascii")
|
||||
except Exception:
|
||||
return s.encode("latin-1", errors="replace").decode("latin-1")
|
||||
|
||||
|
||||
def _rget(r: Any, k: str) -> str:
|
||||
if isinstance(r, dict):
|
||||
return str(r.get(k, "") or "")
|
||||
return str(getattr(r, k, "") or "")
|
||||
|
||||
|
||||
# ── Executive summary builder ─────────────────────────────────────────
|
||||
|
||||
def build_exec_summary(data: dict) -> dict:
|
||||
"""
|
||||
Returns a dict with all dashboard KPIs needed by every format.
|
||||
Expects data keys: records, analysis, scan_meta (optional).
|
||||
"""
|
||||
records = data.get("records", [])
|
||||
meta = data.get("scan_meta", {}) or {}
|
||||
analysis = data.get("analysis", {}) or {}
|
||||
|
||||
cleartext = sum(1 for r in records if _rget(r, "password"))
|
||||
nodes = len({_rget(r, "email") or _rget(r, "username") for r in records} - {""})
|
||||
elapsed = meta.get("elapsed_seconds")
|
||||
depth = meta.get("pivot_depth", len(data.get("pivot_chain", [])))
|
||||
|
||||
buckets: Dict[str, int] = {"Critical": 0, "High": 0, "Medium": 0, "Low": 0, "Info": 0}
|
||||
for r in records:
|
||||
rs = float(_rget(r, "risk_score") or 0)
|
||||
if rs >= 90: buckets["Critical"] += 1
|
||||
elif rs >= 70: buckets["High"] += 1
|
||||
elif rs >= 40: buckets["Medium"] += 1
|
||||
elif rs >= 10: buckets["Low"] += 1
|
||||
else: buckets["Info"] += 1
|
||||
|
||||
return {
|
||||
"total_records": len(records),
|
||||
"nodes_discovered": nodes,
|
||||
"cleartext_passwords": cleartext,
|
||||
"pivot_depth": depth,
|
||||
"elapsed": f"{elapsed:.1f}s" if elapsed is not None else "N/A",
|
||||
"buckets": buckets,
|
||||
"hvt_count": analysis.get("hvt_count", sum(1 for r in records if getattr(r, "is_hvt", False))),
|
||||
}
|
||||
|
||||
|
||||
# ── Pivot chain renderer ──────────────────────────────────────────────
|
||||
|
||||
def render_pivot_chain(data: dict) -> List[str]:
|
||||
"""
|
||||
Build a human-readable pivot chain.
|
||||
D2: check pivot_log first before falling back to record-based reconstruction.
|
||||
"""
|
||||
chain = data.get("pivot_chain") or []
|
||||
target = _raw(data.get("target", "?"))
|
||||
|
||||
# D2: if pivot_log is available, build chain from it (accurate tree)
|
||||
pivot_log = data.get("pivot_log") or []
|
||||
if pivot_log:
|
||||
lines: List[str] = []
|
||||
for e in pivot_log:
|
||||
depth = e.get("depth", 0)
|
||||
asset = _raw(e.get("asset", ""))
|
||||
phase = _raw(e.get("found_in", e.get("source", "?")))
|
||||
parent = _raw(e.get("parent") or "")
|
||||
prefix = " " * depth
|
||||
if depth == 0:
|
||||
lines.append(f"[SEED] {asset}")
|
||||
else:
|
||||
lines.append(f"{prefix}└─ [{phase}] {asset} ← {parent}")
|
||||
return lines if lines else [f"[SEED] {target} (no pivot data)"]
|
||||
|
||||
if len(chain) <= 1:
|
||||
# No pivot data — reconstruct best-effort from records
|
||||
records = data.get("records", [])
|
||||
lines = [f"[SEED] {target}"]
|
||||
seen: set = {target.lower()}
|
||||
for r in records[:40]:
|
||||
src = _raw(_rget(r, "source"))
|
||||
em = _raw(_rget(r, "email"))
|
||||
usr = _raw(_rget(r, "username"))
|
||||
ident = em or usr
|
||||
if not ident or ident.lower() in seen:
|
||||
continue
|
||||
seen.add(ident.lower())
|
||||
lines.append(f" └─ [{src}] → {ident}")
|
||||
dork_results = data.get("dork_results") or []
|
||||
for d in dork_results[:5]:
|
||||
url = _raw(d.get("url", ""))
|
||||
if url and url.lower() not in seen:
|
||||
seen.add(url.lower())
|
||||
lines.append(f" └─ [Dork] → {url[:80]}")
|
||||
return lines if len(lines) > 1 else [f"[SEED] {target} (no pivot data)"]
|
||||
|
||||
# Ordered pivot chain from AvalancheScanner
|
||||
lines = [f"[SEED] {_raw(chain[0])}"]
|
||||
for node in chain[1:]:
|
||||
lines.append(f" └─ [Pivot] → {_raw(node)}")
|
||||
return lines
|
||||
|
||||
|
||||
# ── JSON report ───────────────────────────────────────────────────────
|
||||
|
||||
def to_json(data: dict, path: str) -> None:
|
||||
summary = build_exec_summary(data)
|
||||
chain = render_pivot_chain(data)
|
||||
records = data.get("records", [])
|
||||
|
||||
def _ser(o):
|
||||
try:
|
||||
from enum import Enum
|
||||
if isinstance(o, Enum):
|
||||
return o.name
|
||||
except ImportError:
|
||||
pass
|
||||
if hasattr(o, "to_dict"):
|
||||
return o.to_dict()
|
||||
return str(o)
|
||||
|
||||
clean_records = []
|
||||
for r in records:
|
||||
d = r.to_dict() if hasattr(r, "to_dict") else (r if isinstance(r, dict) else {})
|
||||
# drop noise fields
|
||||
clean_records.append({
|
||||
k: v for k, v in d.items()
|
||||
if k not in ("raw_data", "metadata") and not _NOISE_RE.search(str(v or ""))
|
||||
})
|
||||
|
||||
try:
|
||||
from nox import VERSION as _NOX_VERSION # type: ignore
|
||||
except ImportError:
|
||||
_NOX_VERSION = "1.0.0"
|
||||
|
||||
# Include dork and scrape results in JSON output
|
||||
dork_results = data.get("dork_results", []) or []
|
||||
scrape_results = data.get("scrape_results", {}) or {}
|
||||
|
||||
# D3: apply consistent cap (1000) — same as HTML
|
||||
_RECORD_CAP = 1000
|
||||
|
||||
out_data = {
|
||||
"framework": f"NOX v{_NOX_VERSION}",
|
||||
"generated": datetime.now().isoformat(),
|
||||
"target": data.get("target", ""),
|
||||
# J3: self-describing metadata block
|
||||
"_meta": {
|
||||
"scan_id": hashlib.sha256(
|
||||
f"{data.get('target','')}{datetime.now().isoformat()}".encode()
|
||||
).hexdigest()[:16],
|
||||
"target": data.get("target", ""),
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"nox_version": _NOX_VERSION,
|
||||
"sources_queried": summary.get("total_records", 0),
|
||||
"pivot_depth_reached": summary.get("pivot_depth", 0),
|
||||
"record_cap": _RECORD_CAP,
|
||||
"truncated": len(clean_records) > _RECORD_CAP,
|
||||
},
|
||||
"executive_summary": summary,
|
||||
"pivot_chain": chain,
|
||||
"records": clean_records[:_RECORD_CAP],
|
||||
"dork_results": dork_results,
|
||||
"scrape_results": scrape_results,
|
||||
}
|
||||
Path(path).write_text(json.dumps(out_data, indent=2, default=_ser), encoding="utf-8")
|
||||
print(f"[+] JSON report saved: {path}")
|
||||
|
||||
|
||||
# ── HTML report ───────────────────────────────────────────────────────
|
||||
|
||||
_CSS = (
|
||||
"*{margin:0;padding:0;box-sizing:border-box}"
|
||||
"body{font-family:'Courier New',monospace;background:#0a0a0a;color:#e0e0e0;padding:20px}"
|
||||
".hdr{text-align:center;padding:28px;border:1px solid #333;margin-bottom:18px;background:#111}"
|
||||
".hdr h1{color:#00ff41;font-size:26px;letter-spacing:4px}"
|
||||
".hdr p{color:#888;margin-top:5px;font-size:12px}"
|
||||
".kpis{display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));gap:10px;margin:14px 0}"
|
||||
".kpi{background:#111;border:1px solid #333;padding:16px;text-align:center}"
|
||||
".kpi .n{font-size:30px;font-weight:bold;color:#00ff41}"
|
||||
".kpi .l{color:#888;font-size:10px;margin-top:3px}"
|
||||
".kpi.warn .n{color:#ff6600} .kpi.crit .n{color:#ff0040}"
|
||||
".sec{margin:18px 0} .sec h2{color:#00ff41;border-bottom:1px solid #333;padding-bottom:5px;margin-bottom:10px}"
|
||||
".chain{background:#0d1a0d;border:1px solid #1a3a1a;padding:12px;font-size:11px;color:#00cc33;word-break:break-all;margin:8px 0}"
|
||||
"table{width:100%;border-collapse:collapse} th,td{padding:7px;border:1px solid #222;font-size:11px;word-break:break-all}"
|
||||
"th{background:#1a1a1a;color:#00ff41;text-transform:uppercase;font-size:10px} td{background:#0d0d0d}"
|
||||
"tr.c td{background:#1a0005} tr.h td{background:#1a0a00} tr.m td{background:#1a1500}"
|
||||
".pw{color:#ff0040;font-weight:bold}"
|
||||
)
|
||||
|
||||
|
||||
def to_html(data: dict, path: str) -> None:
|
||||
summary = build_exec_summary(data)
|
||||
chain = render_pivot_chain(data)
|
||||
target = _clean(data.get("target", "Unknown"))
|
||||
records = data.get("records", [])
|
||||
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
||||
# KPI dashboard
|
||||
kpis = (
|
||||
f'<div class="kpi"><div class="n">{summary["total_records"]}</div><div class="l">TOTAL RECORDS</div></div>'
|
||||
f'<div class="kpi"><div class="n">{summary["nodes_discovered"]}</div><div class="l">NODES DISCOVERED</div></div>'
|
||||
f'<div class="kpi crit"><div class="n">{summary["cleartext_passwords"]}</div><div class="l">CLEARTEXT PASSWORDS</div></div>'
|
||||
f'<div class="kpi warn"><div class="n">{summary["hvt_count"]}</div><div class="l">HIGH-VALUE TARGETS</div></div>'
|
||||
f'<div class="kpi"><div class="n">{summary["pivot_depth"]}</div><div class="l">PIVOT DEPTH</div></div>'
|
||||
f'<div class="kpi"><div class="n">{summary["elapsed"]}</div><div class="l">TOTAL TIME</div></div>'
|
||||
)
|
||||
|
||||
# Severity table
|
||||
sev_rows = "".join(
|
||||
f"<tr><td>{lvl}</td><td>{cnt}</td></tr>"
|
||||
for lvl, cnt in summary["buckets"].items() if cnt
|
||||
)
|
||||
|
||||
# Pivot chain
|
||||
chain_html = "".join(f'<div class="chain">{_clean(c)}</div>' for c in chain)
|
||||
|
||||
# Credential rows (top 500, noise-free)
|
||||
cred_rows = ""
|
||||
for r in records[:500]:
|
||||
rs = float(_rget(r, "risk_score") or 0)
|
||||
cls = "c" if rs >= 90 else "h" if rs >= 70 else "m" if rs >= 40 else ""
|
||||
em = _clean(_rget(r, "email") or _rget(r, "username"))
|
||||
pw = _clean(_rget(r, "password"))
|
||||
src = _clean(_rget(r, "source"))
|
||||
bd = _clean(_rget(r, "breach_date"))
|
||||
hvt = " ⚑" if getattr(r, "is_hvt", False) or (isinstance(r, dict) and r.get("is_hvt")) else ""
|
||||
cred_rows += (
|
||||
f"<tr class='{cls}'><td>{em}{hvt}</td>"
|
||||
f"<td class='pw'>{pw}</td><td>{src}</td><td>{bd}</td><td>{rs:.0f}</td></tr>"
|
||||
)
|
||||
|
||||
# Dork results section
|
||||
dork_results = data.get("dork_results", []) or []
|
||||
dork_rows = ""
|
||||
for h in dork_results:
|
||||
url = h.get("url", "")
|
||||
title = h.get("title", "") or h.get("dork", "")
|
||||
snippet = h.get("snippet", "")
|
||||
engine = h.get("engine", "")
|
||||
link = (f'<a href="{_clean(url)}" style="color:#00ff41" target="_blank">{_clean(url[:80])}</a>'
|
||||
if url else _clean(title[:80]))
|
||||
dork_rows += (
|
||||
f"<tr><td>{link}</td><td>{_clean(snippet[:120])}</td>"
|
||||
f"<td>{_clean(h.get('dork','')[:80])}</td><td>{_clean(engine)}</td></tr>"
|
||||
)
|
||||
dork_section = (
|
||||
f'<div class="sec"><h2>Dork Results ({len(dork_results)} hits)</h2>'
|
||||
f'<table><thead><tr><th>URL / Title</th><th>Snippet</th><th>Dork Query</th><th>Engine</th></tr></thead>'
|
||||
f'<tbody>{dork_rows if dork_rows else "<tr><td colspan=4 style=text-align:center>No dork hits</td></tr>"}</tbody></table></div>'
|
||||
)
|
||||
|
||||
# Scrape results section
|
||||
scrape_results = data.get("scrape_results", {}) or {}
|
||||
pastes = scrape_results.get("pastes", [])
|
||||
creds_sc = scrape_results.get("credentials", [])
|
||||
tg_hits = scrape_results.get("telegram", [])
|
||||
mc_hits = scrape_results.get("dork_misconfigs", [])
|
||||
|
||||
paste_rows = ""
|
||||
for p in pastes:
|
||||
site = _clean(p.get("site", ""))
|
||||
pid = p.get("id", "")
|
||||
pats = _clean(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items()))
|
||||
paste_rows += f"<tr><td>{site}</td><td>{_clean(pid)}</td><td>{pats}</td></tr>"
|
||||
|
||||
cred_sc_rows = ""
|
||||
for c in creds_sc:
|
||||
cred_sc_rows += (
|
||||
f"<tr><td class='pw'>{_clean(c.get('raw','')[:120])}</td>"
|
||||
f"<td>{_clean(c.get('source',''))}</td><td>{_clean(c.get('paste_id',''))}</td></tr>"
|
||||
)
|
||||
|
||||
tg_rows = ""
|
||||
for t in tg_hits:
|
||||
ch = _clean(t.get("channel", ""))
|
||||
text = _clean(t.get("text", "")[:200])
|
||||
pats = _clean(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items()))
|
||||
link = f'<a href="https://t.me/s/{ch}" style="color:#00ff41" target="_blank">t.me/s/{ch}</a>'
|
||||
tg_rows += f"<tr><td>{link}</td><td>{text}</td><td>{pats}</td></tr>"
|
||||
|
||||
mc_rows = ""
|
||||
for m in mc_hits:
|
||||
url_m = m.get("url", "")
|
||||
title_m = _clean(m.get("title", "")[:80])
|
||||
dork_m = _clean(m.get("dork", "")[:80])
|
||||
link_m = (f'<a href="{_clean(url_m)}" style="color:#ff0040" target="_blank">{_clean(url_m[:80])}</a>'
|
||||
if url_m else title_m)
|
||||
mc_rows += f"<tr><td>{link_m}</td><td>{title_m}</td><td>{dork_m}</td></tr>"
|
||||
|
||||
scrape_section = (
|
||||
f'<div class="sec"><h2>Scrape Results</h2>'
|
||||
f'<h3 style="color:#aaa;margin:10px 0 5px">Pastes ({len(pastes)})</h3>'
|
||||
f'<table><thead><tr><th>Site</th><th>Paste ID</th><th>Patterns</th></tr></thead>'
|
||||
f'<tbody>{paste_rows or "<tr><td colspan=3 style=text-align:center>None</td></tr>"}</tbody></table>'
|
||||
f'<h3 style="color:#aaa;margin:10px 0 5px">Extracted Credentials ({len(creds_sc)})</h3>'
|
||||
f'<table><thead><tr><th>Raw Credential</th><th>Source</th><th>Paste ID</th></tr></thead>'
|
||||
f'<tbody>{cred_sc_rows or "<tr><td colspan=3 style=text-align:center>None</td></tr>"}</tbody></table>'
|
||||
f'<h3 style="color:#aaa;margin:10px 0 5px">Telegram CTI ({len(tg_hits)})</h3>'
|
||||
f'<table><thead><tr><th>Channel</th><th>Message</th><th>Patterns</th></tr></thead>'
|
||||
f'<tbody>{tg_rows or "<tr><td colspan=3 style=text-align:center>None</td></tr>"}</tbody></table>'
|
||||
f'<h3 style="color:#aaa;margin:10px 0 5px">Misconfigurations ({len(mc_hits)})</h3>'
|
||||
f'<table><thead><tr><th>URL</th><th>Title</th><th>Dork</th></tr></thead>'
|
||||
f'<tbody>{mc_rows or "<tr><td colspan=3 style=text-align:center>None</td></tr>"}</tbody></table>'
|
||||
f'</div>'
|
||||
)
|
||||
|
||||
page = (
|
||||
f'<!DOCTYPE html><html><head><meta charset="utf-8">'
|
||||
f'<title>NOX — {target}</title><style>{_CSS}</style></head><body>'
|
||||
f'<div class="hdr"><h1>[ NOX ]</h1>'
|
||||
f'<p>Target: {target} | {ts} | NOX v{_nox_ver()}</p></div>'
|
||||
f'<div class="sec"><h2>Executive Summary</h2>'
|
||||
f'<div class="kpis">{kpis}</div>'
|
||||
f'<table><thead><tr><th>Severity</th><th>Count</th></tr></thead>'
|
||||
f'<tbody>{sev_rows}</tbody></table></div>'
|
||||
f'<div class="sec"><h2>Pivot Chain</h2>{chain_html}</div>'
|
||||
f'{dork_section}'
|
||||
f'{scrape_section}'
|
||||
f'<div class="sec"><h2>Credential Records (top 500)</h2>'
|
||||
f'<table><thead><tr><th>Identity</th><th>Password</th><th>Source</th>'
|
||||
f'<th>Date</th><th>Risk</th></tr></thead><tbody>{cred_rows}</tbody></table></div>'
|
||||
f'</body></html>'
|
||||
)
|
||||
Path(path).write_text(page, encoding="utf-8")
|
||||
print(f"[+] HTML report saved: {path}")
|
||||
|
||||
|
||||
# ── PDF report (fpdf2) ────────────────────────────────────────────────
|
||||
|
||||
def to_pdf(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None:
|
||||
# D1: raise a clear error with install hint if fpdf2 is absent — never silently return.
|
||||
try:
|
||||
from fpdf import FPDF # type: ignore
|
||||
except ImportError:
|
||||
msg = "[!] fpdf2 not installed — PDF report cannot be generated. Run: pip install fpdf2"
|
||||
print(msg)
|
||||
raise RuntimeError(msg)
|
||||
|
||||
summary = build_exec_summary(data)
|
||||
chain = render_pivot_chain(data)
|
||||
target = _raw(data.get("target", "Unknown"))
|
||||
records = data.get("records", [])
|
||||
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
|
||||
class _PDF(FPDF):
|
||||
def header(self):
|
||||
self.set_font("Helvetica", "B", 8)
|
||||
self.set_text_color(120, 120, 120)
|
||||
self.cell(0, 5, "NOX - FORENSIC INTELLIGENCE REPORT - CONFIDENTIAL", align="R")
|
||||
self.ln(3)
|
||||
|
||||
def footer(self):
|
||||
self.set_y(-12)
|
||||
self.set_font("Helvetica", "", 8)
|
||||
self.set_text_color(150, 150, 150)
|
||||
self.cell(0, 5, _pdf_safe(f"Page {self.page_no()} | {target[:50]}"), align="C")
|
||||
|
||||
pdf = _PDF(orientation="P", unit="mm", format="A4")
|
||||
pdf.set_auto_page_break(auto=True, margin=15)
|
||||
pdf.set_margins(15, 15, 15)
|
||||
|
||||
# ── Cover page ────────────────────────────────────────────────────
|
||||
pdf.add_page()
|
||||
pdf.set_fill_color(15, 15, 15)
|
||||
pdf.rect(0, 0, 210, 297, "F")
|
||||
pdf.set_y(65)
|
||||
pdf.set_font("Helvetica", "B", 26)
|
||||
pdf.set_text_color(0, 220, 60)
|
||||
pdf.cell(0, 12, "FORENSIC INTELLIGENCE REPORT", align="C")
|
||||
pdf.ln(8)
|
||||
pdf.set_font("Helvetica", "B", 13)
|
||||
pdf.set_text_color(200, 200, 200)
|
||||
pdf.cell(0, 8, _pdf_safe(f"Target: {target}"), align="C")
|
||||
pdf.ln(6)
|
||||
pdf.set_font("Helvetica", "", 10)
|
||||
pdf.set_text_color(140, 140, 140)
|
||||
for line in [f"Generated: {ts}", f"Investigator: {investigator_id}",
|
||||
f"Framework: NOX v{_nox_ver()}", "Classification: RESTRICTED"]:
|
||||
pdf.cell(0, 6, _pdf_safe(line), align="C")
|
||||
pdf.ln(5)
|
||||
|
||||
# ── Executive Summary ─────────────────────────────────────────────
|
||||
pdf.add_page()
|
||||
pdf.set_fill_color(255, 255, 255)
|
||||
pdf.set_text_color(0, 0, 0)
|
||||
pdf.set_font("Helvetica", "B", 15)
|
||||
pdf.cell(0, 10, "Executive Summary", ln=True)
|
||||
pdf.set_draw_color(0, 180, 50)
|
||||
pdf.set_line_width(0.4)
|
||||
pdf.line(15, pdf.get_y(), 195, pdf.get_y())
|
||||
pdf.ln(4)
|
||||
|
||||
kpis = [
|
||||
("Total Time", summary["elapsed"]),
|
||||
("Nodes Discovered", str(summary["nodes_discovered"])),
|
||||
("Cleartext Passwords Found", str(summary["cleartext_passwords"])),
|
||||
("Pivot Depth", str(summary["pivot_depth"])),
|
||||
("Total Records", str(summary["total_records"])),
|
||||
("High-Value Targets", str(summary["hvt_count"])),
|
||||
]
|
||||
pdf.set_font("Helvetica", "B", 10)
|
||||
for label, value in kpis:
|
||||
pdf.set_fill_color(245, 245, 245)
|
||||
pdf.cell(95, 7, _pdf_safe(label), border=1, fill=True)
|
||||
pdf.set_font("Helvetica", "", 10)
|
||||
pdf.cell(80, 7, _pdf_safe(value), border=1, ln=True)
|
||||
pdf.set_font("Helvetica", "B", 10)
|
||||
pdf.ln(4)
|
||||
|
||||
# Severity breakdown
|
||||
pdf.set_font("Helvetica", "B", 11)
|
||||
pdf.cell(0, 7, "Severity Breakdown", ln=True)
|
||||
_sev_c = {"Critical": (220,0,30), "High": (220,100,0),
|
||||
"Medium": (200,180,0), "Low": (0,150,50), "Info": (100,100,100)}
|
||||
total_b = max(sum(summary["buckets"].values()), 1)
|
||||
for level, count in summary["buckets"].items():
|
||||
pdf.set_font("Helvetica", "", 9)
|
||||
pdf.cell(35, 6, _pdf_safe(level), border=1)
|
||||
pdf.cell(20, 6, str(count), border=1)
|
||||
bar_w = int(count / total_b * 120)
|
||||
x, y = pdf.get_x(), pdf.get_y()
|
||||
pdf.cell(125, 6, "", border=1)
|
||||
if bar_w:
|
||||
rc, gc, bc = _sev_c.get(level, (100, 100, 100))
|
||||
pdf.set_fill_color(rc, gc, bc)
|
||||
pdf.rect(x + 1, y + 1, bar_w, 4, "F")
|
||||
pdf.ln()
|
||||
|
||||
# ── Pivot Chain ───────────────────────────────────────────────────
|
||||
pdf.ln(5)
|
||||
pdf.set_font("Helvetica", "B", 11)
|
||||
pdf.cell(0, 7, "Pivot Chain Visualization", ln=True)
|
||||
pdf.line(15, pdf.get_y(), 195, pdf.get_y())
|
||||
pdf.ln(3)
|
||||
pdf.set_font("Courier", "", 8)
|
||||
pdf.set_fill_color(240, 255, 240)
|
||||
for c_line in chain:
|
||||
# Word-wrap long chains at 100 chars
|
||||
for chunk in [c_line[i:i+100] for i in range(0, max(len(c_line), 1), 100)]:
|
||||
pdf.set_x(15)
|
||||
pdf.cell(180, 5, _pdf_safe(chunk), border=0, ln=True, fill=True)
|
||||
pdf.ln(3)
|
||||
|
||||
# ── Credential Findings ───────────────────────────────────────────
|
||||
pdf.add_page()
|
||||
pdf.set_font("Helvetica", "B", 13)
|
||||
pdf.set_text_color(0, 0, 0)
|
||||
pdf.cell(0, 9, "Credential Findings", ln=True)
|
||||
pdf.line(15, pdf.get_y(), 195, pdf.get_y())
|
||||
pdf.ln(3)
|
||||
|
||||
cols = [("Identity", 60), ("Password", 45), ("Source", 35), ("Date", 25), ("Risk", 15)]
|
||||
|
||||
def _write_col_headers():
|
||||
pdf.set_font("Helvetica", "B", 8)
|
||||
pdf.set_fill_color(30, 30, 30)
|
||||
pdf.set_text_color(255, 255, 255)
|
||||
for col_name, col_w in cols:
|
||||
pdf.cell(col_w, 6, col_name, border=1, fill=True)
|
||||
pdf.ln()
|
||||
pdf.set_text_color(0, 0, 0)
|
||||
|
||||
_write_col_headers()
|
||||
|
||||
for r in records[:500]:
|
||||
pw = _rget(r, "password")
|
||||
if not pw and not _rget(r, "email") and not _rget(r, "username"):
|
||||
continue # skip noise rows with no actionable data
|
||||
rs = float(_rget(r, "risk_score") or 0)
|
||||
if rs >= 90: pdf.set_fill_color(255, 220, 220)
|
||||
elif rs >= 70: pdf.set_fill_color(255, 240, 220)
|
||||
else: pdf.set_fill_color(255, 255, 255)
|
||||
pdf.set_font("Helvetica", "", 7)
|
||||
# Auto page-break with repeated column headers (§5.1)
|
||||
if pdf.get_y() > pdf.h - 25:
|
||||
pdf.add_page()
|
||||
_write_col_headers()
|
||||
vals = [
|
||||
_pdf_safe(_rget(r, "email") or _rget(r, "username"), 38),
|
||||
_pdf_safe(pw, 28),
|
||||
_pdf_safe(_rget(r, "source"), 22),
|
||||
_pdf_safe(_rget(r, "breach_date"), 14),
|
||||
f"{rs:.0f}",
|
||||
]
|
||||
for val, (_, w) in zip(vals, cols):
|
||||
pdf.cell(w, 5, val, border=1, fill=True)
|
||||
pdf.ln()
|
||||
|
||||
# ── Dork Results ─────────────────────────────────────────────────
|
||||
dork_results = data.get("dork_results", []) or []
|
||||
if dork_results:
|
||||
pdf.add_page()
|
||||
pdf.set_font("Helvetica", "B", 13)
|
||||
pdf.set_text_color(0, 0, 0)
|
||||
pdf.cell(0, 9, _pdf_safe(f"Dork Results ({len(dork_results)} hits)"), ln=True)
|
||||
pdf.line(15, pdf.get_y(), 195, pdf.get_y())
|
||||
pdf.ln(3)
|
||||
pdf.set_font("Helvetica", "B", 8)
|
||||
pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
|
||||
for col_name, col_w in [("URL / Title", 95), ("Snippet", 55), ("Engine", 30)]:
|
||||
pdf.cell(col_w, 6, col_name, border=1, fill=True)
|
||||
pdf.ln(); pdf.set_text_color(0, 0, 0)
|
||||
for h in dork_results[:200]:
|
||||
pdf.set_fill_color(245, 245, 255); pdf.set_font("Helvetica", "", 7)
|
||||
url = _pdf_safe(h.get("url", h.get("title", "")), 65)
|
||||
snippet = _pdf_safe(h.get("snippet", ""), 38)
|
||||
engine = _pdf_safe(h.get("engine", ""), 20)
|
||||
for val, w in zip([url, snippet, engine], [95, 55, 30]):
|
||||
pdf.cell(w, 5, val, border=1, fill=True)
|
||||
pdf.ln()
|
||||
|
||||
# ── Scrape Results ────────────────────────────────────────────────
|
||||
scrape_results = data.get("scrape_results", {}) or {}
|
||||
pastes = scrape_results.get("pastes", [])
|
||||
creds_sc = scrape_results.get("credentials", [])
|
||||
tg_hits = scrape_results.get("telegram", [])
|
||||
mc_hits = scrape_results.get("dork_misconfigs", [])
|
||||
|
||||
if pastes or creds_sc or tg_hits or mc_hits:
|
||||
pdf.add_page()
|
||||
pdf.set_font("Helvetica", "B", 13)
|
||||
pdf.set_text_color(0, 0, 0)
|
||||
pdf.cell(0, 9, "Scrape Results", ln=True)
|
||||
pdf.line(15, pdf.get_y(), 195, pdf.get_y())
|
||||
pdf.ln(3)
|
||||
|
||||
if pastes:
|
||||
pdf.set_font("Helvetica", "B", 10)
|
||||
pdf.cell(0, 7, _pdf_safe(f"Pastes ({len(pastes)})"), ln=True)
|
||||
pdf.set_font("Helvetica", "B", 8)
|
||||
pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
|
||||
for col_name, col_w in [("Site", 25), ("Paste ID", 80), ("Patterns", 75)]:
|
||||
pdf.cell(col_w, 6, col_name, border=1, fill=True)
|
||||
pdf.ln(); pdf.set_text_color(0, 0, 0)
|
||||
for p in pastes[:100]:
|
||||
pdf.set_fill_color(245, 245, 245); pdf.set_font("Helvetica", "", 7)
|
||||
site = _pdf_safe(p.get("site", ""), 15)
|
||||
pid = _pdf_safe(p.get("id", ""), 55)
|
||||
pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items()), 50)
|
||||
for val, w in zip([site, pid, pats], [25, 80, 75]):
|
||||
pdf.cell(w, 5, val, border=1, fill=True)
|
||||
pdf.ln()
|
||||
pdf.ln(3)
|
||||
|
||||
if creds_sc:
|
||||
pdf.set_font("Helvetica", "B", 10)
|
||||
pdf.cell(0, 7, _pdf_safe(f"Extracted Credentials ({len(creds_sc)})"), ln=True)
|
||||
pdf.set_font("Helvetica", "B", 8)
|
||||
pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
|
||||
for col_name, col_w in [("Raw Credential", 120), ("Source", 30), ("Paste ID", 30)]:
|
||||
pdf.cell(col_w, 6, col_name, border=1, fill=True)
|
||||
pdf.ln(); pdf.set_text_color(0, 0, 0)
|
||||
for c in creds_sc[:150]:
|
||||
pdf.set_fill_color(255, 240, 240); pdf.set_font("Helvetica", "", 7)
|
||||
raw = _pdf_safe(c.get("raw", ""), 80)
|
||||
src = _pdf_safe(c.get("source", ""), 20)
|
||||
pid = _pdf_safe(c.get("paste_id", ""), 20)
|
||||
for val, w in zip([raw, src, pid], [120, 30, 30]):
|
||||
pdf.cell(w, 5, val, border=1, fill=True)
|
||||
pdf.ln()
|
||||
pdf.ln(3)
|
||||
|
||||
if tg_hits:
|
||||
pdf.set_font("Helvetica", "B", 10)
|
||||
pdf.cell(0, 7, _pdf_safe(f"Telegram CTI ({len(tg_hits)})"), ln=True)
|
||||
pdf.set_font("Helvetica", "B", 8)
|
||||
pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
|
||||
for col_name, col_w in [("Channel", 50), ("Message Excerpt", 100), ("Patterns", 30)]:
|
||||
pdf.cell(col_w, 6, col_name, border=1, fill=True)
|
||||
pdf.ln(); pdf.set_text_color(0, 0, 0)
|
||||
for t in tg_hits[:80]:
|
||||
pdf.set_fill_color(245, 245, 255); pdf.set_font("Helvetica", "", 7)
|
||||
link = _pdf_safe(f"t.me/s/{t.get('channel','')}", 35)
|
||||
text = _pdf_safe(t.get("text", ""), 70)
|
||||
pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items()), 25)
|
||||
for val, w in zip([link, text, pats], [50, 100, 30]):
|
||||
pdf.cell(w, 5, val, border=1, fill=True)
|
||||
pdf.ln()
|
||||
pdf.ln(3)
|
||||
|
||||
if mc_hits:
|
||||
pdf.set_font("Helvetica", "B", 10)
|
||||
pdf.cell(0, 7, _pdf_safe(f"Misconfigurations ({len(mc_hits)})"), ln=True)
|
||||
pdf.set_font("Helvetica", "B", 8)
|
||||
pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
|
||||
for col_name, col_w in [("URL", 90), ("Title", 60), ("Dork", 30)]:
|
||||
pdf.cell(col_w, 6, col_name, border=1, fill=True)
|
||||
pdf.ln(); pdf.set_text_color(0, 0, 0)
|
||||
for m in mc_hits[:80]:
|
||||
pdf.set_fill_color(255, 245, 230); pdf.set_font("Helvetica", "", 7)
|
||||
url_m = _pdf_safe(m.get("url", ""), 60)
|
||||
title_m = _pdf_safe(m.get("title", ""), 40)
|
||||
dork_m = _pdf_safe(m.get("dork", ""), 25)
|
||||
for val, w in zip([url_m, title_m, dork_m], [90, 60, 30]):
|
||||
pdf.cell(w, 5, val, border=1, fill=True)
|
||||
pdf.ln()
|
||||
|
||||
pdf.output(path)
|
||||
print(f"[+] PDF report saved: {path}")
|
||||
@@ -0,0 +1,525 @@
|
||||
"""
|
||||
sources/helpers/scanner.py
|
||||
Recursive Avalanche Engine for NOX autoscan.
|
||||
|
||||
Pipeline per asset (sequential phases):
|
||||
Phase 1 — Breach scan
|
||||
Phase 2 — Hash crack (non-blocking, on breach results)
|
||||
Phase 3 — Dork
|
||||
Phase 4 — Scrape
|
||||
→ Harvest new identifiers from all phases
|
||||
→ Reinject every new unique identifier (not seen before) recursively
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nox import Orchestrator
|
||||
|
||||
_syslog = logging.getLogger("nox.system")
|
||||
|
||||
_EMAIL_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.]+")
|
||||
_USERNAME_RE = re.compile(r"(?:github\.com|twitter\.com|linkedin\.com/in|reddit\.com/u)/([A-Za-z0-9_.-]{3,39})", re.I)
|
||||
_PHONE_RE = re.compile(r"\+\d[\d\s.\-()]{7,14}\d|\b\d{3}[\s.\-]\d{3}[\s.\-]\d{4}\b")
|
||||
_NAME_RE = re.compile(r"\b([A-Z][a-z]{1,20}(?:\s+[A-Z][a-z]{1,20}){1,3})\b")
|
||||
|
||||
_DORK_LIMIT = 20
|
||||
_PIVOT_TYPES = {"email", "username", "phone", "name", "ip", "domain"}
|
||||
|
||||
|
||||
def _cfg_depth(orc=None) -> int:
|
||||
# A7/A10: read from orchestrator config if available
|
||||
if orc is not None:
|
||||
cfg = getattr(orc, "config", None)
|
||||
if cfg is not None:
|
||||
v = getattr(cfg, "pivot_depth", None)
|
||||
if v is not None:
|
||||
return int(v)
|
||||
try:
|
||||
from nox import Cfg # type: ignore
|
||||
return Cfg.PIVOT_DEPTH
|
||||
except ImportError:
|
||||
return 2
|
||||
|
||||
|
||||
def _cfg_concurrency(orc=None) -> int:
|
||||
# A7: read from orchestrator config if available
|
||||
if orc is not None:
|
||||
cfg = getattr(orc, "config", None)
|
||||
if cfg is not None:
|
||||
v = getattr(cfg, "concurrency", None)
|
||||
if v is not None:
|
||||
return int(v)
|
||||
try:
|
||||
from nox import Cfg # type: ignore
|
||||
return Cfg.CONCURRENCY
|
||||
except ImportError:
|
||||
return 15
|
||||
|
||||
|
||||
def _out(level: str, msg: str) -> None:
|
||||
try:
|
||||
from nox import out as _nox_out # type: ignore
|
||||
_nox_out(level, msg)
|
||||
except Exception:
|
||||
import sys
|
||||
print(f"[{level}] {msg}", file=sys.stderr)
|
||||
|
||||
|
||||
def _extract_ids_from_text(text: str, exclude: str = "") -> List[Tuple[str, str]]:
|
||||
"""Extract pivotable identifiers from free text, excluding the current asset."""
|
||||
found: List[Tuple[str, str]] = []
|
||||
excl = exclude.lower()
|
||||
for m in _EMAIL_RE.findall(text):
|
||||
v = m.lower()
|
||||
if v != excl:
|
||||
found.append((v, "email"))
|
||||
for m in _USERNAME_RE.findall(text):
|
||||
v = m.lower()
|
||||
if v != excl:
|
||||
found.append((v, "username"))
|
||||
for m in _PHONE_RE.findall(text):
|
||||
clean = re.sub(r"[\s.\-()]", "", m)
|
||||
if 8 <= len(clean) <= 15 and clean != excl:
|
||||
found.append((clean, "phone"))
|
||||
for m in _NAME_RE.findall(text):
|
||||
if len(m.split()) >= 2 and m.lower() != excl:
|
||||
found.append((m, "name"))
|
||||
return found
|
||||
|
||||
|
||||
def _ids_from_records(records: list, exclude: str = "") -> List[Tuple[str, str, str]]:
|
||||
"""
|
||||
Extract pivotable identifiers from breach records.
|
||||
Returns (value, qtype, ref) where ref is the source/breach name for logging.
|
||||
"""
|
||||
found: List[Tuple[str, str, str]] = []
|
||||
excl = exclude.lower()
|
||||
for r in records:
|
||||
src = getattr(r, "source", "") or ""
|
||||
breach = getattr(r, "breach_name", "") or src
|
||||
for val, qtype in [
|
||||
(getattr(r, "email", ""), "email"),
|
||||
(getattr(r, "username", ""), "username"),
|
||||
(getattr(r, "phone", ""), "phone"),
|
||||
(getattr(r, "full_name", ""), "name"),
|
||||
(getattr(r, "ip_address", ""), "ip"),
|
||||
(getattr(r, "domain", ""), "domain"),
|
||||
]:
|
||||
if val and len(val) > 2 and val.lower() != excl:
|
||||
found.append((val.strip(), qtype, breach))
|
||||
meta = getattr(r, "metadata", {}) or {}
|
||||
for em in meta.get("emails", []):
|
||||
if em and em.lower() != excl:
|
||||
found.append((em.lower(), "email", breach))
|
||||
return found
|
||||
|
||||
|
||||
# ── Pivot log entry schema ─────────────────────────────────────────────────
|
||||
# {
|
||||
# "asset": str, # identifier scanned
|
||||
# "qtype": str, # email/username/phone/name/domain/ip
|
||||
# "depth": int, # 0=seed, 1=first pivot, …
|
||||
# "parent": str|None, # asset that discovered this one
|
||||
# "found_in": str, # phase that found this asset: seed/breach/dork/scrape/hash_crack
|
||||
# "records": int, # breach records found for this asset
|
||||
# "dorks": int, # dork hits found for this asset
|
||||
# "scrape": int, # scrape items found for this asset
|
||||
# "children": List[dict], # [{asset, qtype, found_in, ref}] — new assets discovered
|
||||
# "cracked": List[str], # plaintexts cracked from hashes in breach results
|
||||
# }
|
||||
|
||||
|
||||
class AvalancheScanner:
|
||||
def __init__(self, orchestrator: "Orchestrator") -> None:
|
||||
self._orc = orchestrator
|
||||
self.seen_assets: Set[str] = set()
|
||||
# A2: single semaphore for the entire run, created lazily inside the event loop
|
||||
self._sem: Optional[asyncio.Semaphore] = None
|
||||
self._all_records: List = []
|
||||
self._dork_hits: List[dict] = []
|
||||
self._seen_dork_urls: Set[str] = set()
|
||||
# A6: scrape_hits merged atomically per _do_process call
|
||||
self._scrape_hits: Dict = {"pastes": [], "credentials": [], "hashes": [],
|
||||
"telegram": [], "dork_misconfigs": []}
|
||||
self._max_depth: int = 0
|
||||
self._in_flight: Dict[str, asyncio.Future] = {}
|
||||
self.pivot_log: List[dict] = []
|
||||
# A8: global set to prevent duplicate entries in discovered_assets
|
||||
self._seen_discovered: Set[str] = set()
|
||||
self.discovered_assets: List[dict] = []
|
||||
|
||||
def _get_sem(self) -> asyncio.Semaphore:
|
||||
# A2: semaphore created once per run, shared across all coroutines
|
||||
if self._sem is None:
|
||||
self._sem = asyncio.Semaphore(_cfg_concurrency(self._orc))
|
||||
return self._sem
|
||||
|
||||
async def run(self, target: str) -> tuple:
|
||||
# A9: respect no_pivot flag from config
|
||||
cfg = getattr(self._orc, "config", None)
|
||||
no_pivot = getattr(cfg, "no_pivot", False) if cfg else False
|
||||
if no_pivot:
|
||||
try:
|
||||
from nox import Detect # type: ignore
|
||||
qtype = Detect.qtype(target)
|
||||
except ImportError:
|
||||
qtype = "email"
|
||||
async with self._get_sem():
|
||||
try:
|
||||
records = await self._orc._full_async_scan(target, qtype)
|
||||
except Exception:
|
||||
records = []
|
||||
self._all_records.extend(records)
|
||||
self.seen_assets.add(target.lower().strip())
|
||||
self.pivot_log.append({
|
||||
"asset": target, "qtype": qtype, "depth": 0, "parent": None,
|
||||
"found_in": "seed", "records": len(records), "dorks": 0,
|
||||
"scrape": 0, "children": [], "cracked": [],
|
||||
})
|
||||
return self._all_records, self._dork_hits, self._scrape_hits
|
||||
await self._process(target, depth=0, parent=None, found_in="seed")
|
||||
return self._all_records, self._dork_hits, self._scrape_hits
|
||||
|
||||
def get_discovered_assets(self) -> List[dict]:
|
||||
"""Return flat list of all discovered assets with full provenance."""
|
||||
return self.discovered_assets
|
||||
|
||||
def get_max_depth(self) -> int:
|
||||
return self._max_depth
|
||||
|
||||
# ── Dedup gate ────────────────────────────────────────────────────
|
||||
|
||||
async def _process(self, asset: str, depth: int,
|
||||
parent: Optional[str], found_in: str) -> None:
|
||||
"""Dedup gate: ensures each asset is processed exactly once."""
|
||||
# A10: use per-run depth from orchestrator config
|
||||
if depth > _cfg_depth(self._orc):
|
||||
_syslog.debug("avalanche depth cap reached for %s", asset)
|
||||
return
|
||||
|
||||
key = asset.lower().strip()
|
||||
if not key:
|
||||
return
|
||||
|
||||
# A1: add to seen_assets FIRST (atomic gate) before any other check.
|
||||
# If already present, wait on the in-flight future if one exists, then return.
|
||||
if key in self.seen_assets:
|
||||
if key in self._in_flight:
|
||||
try:
|
||||
await self._in_flight[key]
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
self.seen_assets.add(key)
|
||||
|
||||
# If already in-flight (shouldn't happen after the seen_assets check above,
|
||||
# but guard defensively), wait and return.
|
||||
if key in self._in_flight:
|
||||
try:
|
||||
await self._in_flight[key]
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
fut: asyncio.Future = loop.create_future()
|
||||
self._in_flight[key] = fut
|
||||
|
||||
try:
|
||||
await self._do_process(asset, depth, parent, found_in)
|
||||
finally:
|
||||
if not fut.done():
|
||||
fut.set_result(None)
|
||||
|
||||
# ── Core pipeline ─────────────────────────────────────────────────
|
||||
|
||||
async def _do_process(self, asset: str, depth: int,
|
||||
parent: Optional[str], found_in: str) -> None:
|
||||
"""
|
||||
Sequential pipeline:
|
||||
Phase 1 — Breach scan
|
||||
Phase 2 — Hash crack (concurrent, non-blocking)
|
||||
Phase 3 — Dork
|
||||
Phase 4 — Scrape
|
||||
→ Harvest all new identifiers with phase+ref annotation
|
||||
→ Reinject every unseen identifier
|
||||
"""
|
||||
if depth > self._max_depth:
|
||||
self._max_depth = depth
|
||||
|
||||
try:
|
||||
from nox import Detect # type: ignore
|
||||
qtype = Detect.qtype(asset)
|
||||
except ImportError:
|
||||
qtype = "email"
|
||||
|
||||
indent = " " * depth
|
||||
_out("pivot" if depth > 0 else "info",
|
||||
f"{indent}[depth={depth}] {'↳' if depth > 0 else '◉'} {asset} ({qtype})"
|
||||
+ (f" ← {found_in} via {parent}" if parent else " [SEED]"))
|
||||
_syslog.info("AVALANCHE asset=%s depth=%d parent=%s found_in=%s",
|
||||
asset, depth, parent or "—", found_in)
|
||||
|
||||
# ── Phase 1: Breach scan ──────────────────────────────────────
|
||||
async with self._get_sem():
|
||||
try:
|
||||
records: List = await self._orc._full_async_scan(asset, qtype)
|
||||
except Exception as exc:
|
||||
_syslog.warning("BREACH_FAIL asset=%s err=%s", asset, exc)
|
||||
records = []
|
||||
|
||||
_out("ok" if records else "dim",
|
||||
f"{indent} [breach] {len(records)} records")
|
||||
_syslog.info("BREACH_DONE asset=%s records=%d", asset, len(records))
|
||||
self._all_records.extend(records)
|
||||
|
||||
# ── Phase 2: Hash crack (non-blocking) ────────────────────────
|
||||
cracked_plaintexts: List[str] = []
|
||||
try:
|
||||
from sources.helpers.cracker import detect_hash # type: ignore
|
||||
import aiohttp as _aio # type: ignore
|
||||
async with _aio.ClientSession(connector=_aio.TCPConnector(limit=5)) as _cs:
|
||||
crack_tasks = [
|
||||
_crack_and_inject(_cs, getattr(r, "password_hash", ""), r,
|
||||
self.seen_assets, self._all_records,
|
||||
self, depth, asset, cracked_plaintexts)
|
||||
for r in records
|
||||
if getattr(r, "password_hash", "") and not getattr(r, "password", "")
|
||||
and detect_hash(getattr(r, "password_hash", ""))
|
||||
]
|
||||
if crack_tasks:
|
||||
await asyncio.gather(*crack_tasks, return_exceptions=True)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# ── Phase 3: Dork ─────────────────────────────────────────────
|
||||
_out("info", f"{indent} [dork] querying for {asset}…")
|
||||
try:
|
||||
dork_res = await self._async_dork(asset, qtype)
|
||||
except Exception as exc:
|
||||
_syslog.warning("DORK_FAIL asset=%s err=%s", asset, exc)
|
||||
dork_res = []
|
||||
|
||||
dork_count = 0
|
||||
for hit in (dork_res or [])[:_DORK_LIMIT]:
|
||||
url = hit.get("url", "") or hit.get("title", "")
|
||||
if url and url not in self._seen_dork_urls:
|
||||
self._seen_dork_urls.add(url)
|
||||
hit["pivot_asset"] = asset
|
||||
hit["pivot_depth"] = depth
|
||||
self._dork_hits.append(hit)
|
||||
dork_count += 1
|
||||
_out("ok" if dork_count else "dim",
|
||||
f"{indent} [dork] {dork_count} hits")
|
||||
_syslog.info("DORK_DONE asset=%s hits=%d", asset, dork_count)
|
||||
|
||||
# ── Phase 4: Scrape ───────────────────────────────────────────
|
||||
_out("info", f"{indent} [scrape] querying for {asset}…")
|
||||
try:
|
||||
scrape_res = await self._async_scrape(asset)
|
||||
except Exception as exc:
|
||||
_syslog.warning("SCRAPE_FAIL asset=%s err=%s", asset, exc)
|
||||
scrape_res = {}
|
||||
|
||||
# A6: collect scrape results locally, then merge atomically
|
||||
scrape_count = 0
|
||||
local_scrape: Dict = {k: [] for k in self._scrape_hits}
|
||||
for k in self._scrape_hits:
|
||||
for item in (scrape_res or {}).get(k, []):
|
||||
if isinstance(item, dict):
|
||||
item["pivot_asset"] = asset
|
||||
item["pivot_depth"] = depth
|
||||
local_scrape[k].append(item)
|
||||
scrape_count += 1
|
||||
# Atomic merge into shared dict (single-threaded event loop — safe)
|
||||
for k, items in local_scrape.items():
|
||||
self._scrape_hits[k].extend(items)
|
||||
_out("ok" if scrape_count else "dim",
|
||||
f"{indent} [scrape] {scrape_count} items")
|
||||
_syslog.info("SCRAPE_DONE asset=%s items=%d", asset, scrape_count)
|
||||
|
||||
# ── Harvest new identifiers with phase+ref annotation ─────────
|
||||
# Each entry: (value, qtype, found_in_phase, ref)
|
||||
new_ids: List[Tuple[str, str, str, str]] = []
|
||||
|
||||
# From breach records
|
||||
for val, vqtype, ref in _ids_from_records(records, exclude=asset):
|
||||
if vqtype in _PIVOT_TYPES:
|
||||
new_ids.append((val, vqtype, "breach", ref))
|
||||
|
||||
# From dork hits
|
||||
for hit in (dork_res or [])[:_DORK_LIMIT]:
|
||||
url = hit.get("url", "")
|
||||
dork = hit.get("dork", "")
|
||||
ref = url or dork
|
||||
text = f"{hit.get('title','')} {hit.get('snippet','')} {url} {dork}"
|
||||
for val, vqtype in _extract_ids_from_text(text, exclude=asset):
|
||||
if vqtype in _PIVOT_TYPES:
|
||||
new_ids.append((val, vqtype, "dork", ref[:120]))
|
||||
|
||||
# From scrape results
|
||||
for cred in (scrape_res or {}).get("credentials", []):
|
||||
raw = cred.get("raw", "")
|
||||
ref = f"paste:{cred.get('paste_id','')}" or cred.get("source", "scrape")
|
||||
for val, vqtype in _extract_ids_from_text(raw, exclude=asset):
|
||||
if vqtype in _PIVOT_TYPES:
|
||||
new_ids.append((val, vqtype, "scrape", ref))
|
||||
for tg in (scrape_res or {}).get("telegram", []):
|
||||
ref = f"t.me/{tg.get('channel','')}"
|
||||
for val, vqtype in _extract_ids_from_text(tg.get("text", ""), exclude=asset):
|
||||
if vqtype in _PIVOT_TYPES:
|
||||
new_ids.append((val, vqtype, "scrape", ref))
|
||||
for mc in (scrape_res or {}).get("dork_misconfigs", []):
|
||||
ref = mc.get("url", mc.get("title", "misconfig"))
|
||||
for val, vqtype in _extract_ids_from_text(
|
||||
f"{mc.get('title','')} {mc.get('snippet','')}", exclude=asset):
|
||||
if vqtype in _PIVOT_TYPES:
|
||||
new_ids.append((val, vqtype, "scrape", ref[:120]))
|
||||
|
||||
# ── Deduplicate and queue children ────────────────────────────
|
||||
children: List[dict] = []
|
||||
child_tasks = []
|
||||
queued: Set[str] = set()
|
||||
|
||||
for val, vqtype, phase, ref in new_ids:
|
||||
child_key = val.lower().strip()
|
||||
if not child_key or child_key in self.seen_assets or child_key in queued:
|
||||
continue
|
||||
queued.add(child_key)
|
||||
child_entry = {"asset": val, "qtype": vqtype, "found_in": phase, "ref": ref}
|
||||
children.append(child_entry)
|
||||
# A8: prevent duplicate entries in discovered_assets across parallel parents
|
||||
if child_key not in self._seen_discovered:
|
||||
self._seen_discovered.add(child_key)
|
||||
self.discovered_assets.append({
|
||||
"asset": val,
|
||||
"qtype": vqtype,
|
||||
"phase": phase,
|
||||
"ref": ref,
|
||||
"parent": asset,
|
||||
"depth": depth + 1,
|
||||
})
|
||||
_out("pivot",
|
||||
f"{indent} ↳ new asset [{phase}]: {val} ({vqtype}) ref: {ref[:60]}")
|
||||
_syslog.info("PIVOT_QUEUE asset=%s qtype=%s phase=%s ref=%s parent=%s depth=%d",
|
||||
val, vqtype, phase, ref[:80], asset, depth + 1)
|
||||
child_tasks.append(
|
||||
self._process(val, depth + 1, parent=asset, found_in=phase)
|
||||
)
|
||||
|
||||
# A5: run child tasks FIRST, then append pivot_log so the log reflects actual outcomes
|
||||
if child_tasks:
|
||||
_out("info", f"{indent} → reinjecting {len(child_tasks)} new asset(s)…")
|
||||
await asyncio.gather(*child_tasks, return_exceptions=True)
|
||||
|
||||
# ── Log this node (after children complete — A5) ──────────────
|
||||
self.pivot_log.append({
|
||||
"asset": asset,
|
||||
"qtype": qtype,
|
||||
"depth": depth,
|
||||
"parent": parent,
|
||||
"found_in": found_in,
|
||||
"records": len(records),
|
||||
"dorks": dork_count,
|
||||
"scrape": scrape_count,
|
||||
"children": children,
|
||||
"cracked": cracked_plaintexts or [],
|
||||
})
|
||||
|
||||
# ── Dork dispatcher ───────────────────────────────────────────────
|
||||
|
||||
async def _async_dork(self, asset: str, qtype: str = "email") -> list:
|
||||
try:
|
||||
import aiohttp as _aio # type: ignore
|
||||
import ssl as _ssl
|
||||
connector = _aio.TCPConnector(limit=10, ssl=_ssl.create_default_context(), family=0)
|
||||
async with _aio.ClientSession(connector=connector) as session:
|
||||
recs = await self._orc.dorking_engine.async_search(session, asset, qtype)
|
||||
return [
|
||||
{
|
||||
"url": r.raw_data.get("url", "") if hasattr(r, "raw_data") else "",
|
||||
"title": r.raw_data.get("url", r.raw_data.get("dork", "")) if hasattr(r, "raw_data") else "",
|
||||
"snippet": "",
|
||||
"dork": r.raw_data.get("dork", "") if hasattr(r, "raw_data") else "",
|
||||
"engine": "DDG",
|
||||
}
|
||||
for r in recs
|
||||
]
|
||||
except ImportError:
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, self._orc.dork, asset)
|
||||
return result if isinstance(result, list) else []
|
||||
except Exception as exc:
|
||||
_syslog.debug("DORK_ERR asset=%s err=%s", asset, exc)
|
||||
return []
|
||||
|
||||
# ── Scrape dispatcher ─────────────────────────────────────────────
|
||||
|
||||
async def _async_scrape(self, asset: str) -> dict:
|
||||
# A3: instantiate a fresh Session + ScrapeEngine per call to avoid sharing
|
||||
# a non-thread-safe requests.Session / cloudscraper across concurrent coroutines.
|
||||
_empty: dict = {"pastes": [], "credentials": [], "hashes": [],
|
||||
"telegram": [], "dork_misconfigs": []}
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
try:
|
||||
from nox import Session, NoxConfig, ScrapeEngine # type: ignore
|
||||
_cfg = getattr(self._orc, "config", None) or NoxConfig()
|
||||
_session = Session(_cfg)
|
||||
_engine = ScrapeEngine(_session, self._orc.db)
|
||||
qtype = "email"
|
||||
try:
|
||||
from nox import Detect # type: ignore
|
||||
qtype = Detect.qtype(asset)
|
||||
except Exception:
|
||||
pass
|
||||
result = await loop.run_in_executor(None, _engine.run, asset, qtype)
|
||||
except Exception:
|
||||
result = await loop.run_in_executor(None, self._orc.scrape, asset)
|
||||
return result if isinstance(result, dict) else _empty
|
||||
except Exception as exc:
|
||||
_syslog.debug("SCRAPE_ERR asset=%s err=%s", asset, exc)
|
||||
return _empty
|
||||
|
||||
|
||||
# ── Hash crack helper ──────────────────────────────────────────────────────
|
||||
|
||||
async def _crack_and_inject(session, hash_value: str, record_ref,
|
||||
seen_assets: Set[str], all_records: list,
|
||||
scanner: "AvalancheScanner",
|
||||
depth: int, parent_asset: str,
|
||||
cracked_out: List[str]) -> None:
|
||||
from sources.helpers.cracker import detect_hash, async_crack, CRACK_TIMEOUT # type: ignore
|
||||
hash_type = detect_hash(hash_value)
|
||||
if not hash_type:
|
||||
return
|
||||
try:
|
||||
plaintext = await asyncio.wait_for(
|
||||
async_crack(session, hash_value, hash_type), timeout=CRACK_TIMEOUT)
|
||||
except (asyncio.TimeoutError, Exception) as exc:
|
||||
_syslog.debug("CRACK_FAIL hash=%s reason=%s", hash_value[:16], exc)
|
||||
return
|
||||
|
||||
if not plaintext:
|
||||
_syslog.debug("CRACK_FAIL hash=%s reason=no_result", hash_value[:16])
|
||||
return
|
||||
|
||||
record_ref.password = plaintext
|
||||
record_ref.hash_type = hash_type
|
||||
if "Cracked" not in (record_ref.data_types or []):
|
||||
record_ref.data_types = list(record_ref.data_types) + ["Cracked"]
|
||||
_syslog.info("CRACK_OK hash=%s plain=%s parent=%s", hash_value[:16], plaintext, parent_asset)
|
||||
_out("ok", f" [crack] {hash_value[:16]}… → {plaintext} (from {parent_asset})")
|
||||
cracked_out.append(plaintext)
|
||||
|
||||
# A4: inject cracked plaintext as qtype="password" — NOT as username.
|
||||
# Only pivot on it if sources support password-recycling queries.
|
||||
key = plaintext.lower()
|
||||
if key not in seen_assets and depth + 1 <= _cfg_depth(scanner._orc):
|
||||
await scanner._process(plaintext, depth + 1,
|
||||
parent=parent_asset, found_in="hash_crack")
|
||||
Reference in New Issue
Block a user