NOX Framework v1.0.0

2026-06-08 16:07:17 +00:00 · 2026-04-07 10:17:43 +02:00
commit 913e764133
163 changed files with 15613 additions and 0 deletions
@@ -0,0 +1,243 @@
+"""
+sources/helpers/config_handler.py — NOX Framework
+Unified credential management via ~/.config/nox-cli/apikeys.json (XDG).
+
+Priority: environment variable → apikeys.json → None
+"""
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Dict, Optional
+
+# ── Shared constant — import this everywhere instead of a raw string ───
+UNIVERSAL_PLACEHOLDER = "INSERT_API_KEY_HERE"
+
+# ── XDG config path ────────────────────────────────────────────────────
+_CONFIG_DIR   = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) / "nox-cli"
+_APIKEYS_FILE = _CONFIG_DIR / "apikeys.json"
+
+# ── Complete service registry ──────────────────────────────────────────
+# Format: key_name → {"display": str, "public": bool}
+# public=True  → no key needed, always active
+# public=False → requires a real API key (goes into apikeys.json)
+SERVICE_REGISTRY: Dict[str, Dict] = {
+    # ── Public / keyless ──────────────────────────────────────────────
+    "alienvault_otx_domain":   {"display": "AlienVault OTX (Domain)",   "public": True},
+    "alienvault_otx_ip":       {"display": "AlienVault OTX (IP)",        "public": True},
+    "alienvault_otx_malware":  {"display": "AlienVault OTX (Malware)",   "public": True},
+    "alienvault_otx_user":     {"display": "AlienVault OTX (User)",      "public": True},
+    "anubis_subdomains":       {"display": "Anubis Subdomains",          "public": True},
+    "bgpview_ip":              {"display": "BGPView IP",                 "public": True},
+    "checkleaked":             {"display": "CheckLeaked",                "public": True},
+    "crt_sh":                  {"display": "crt.sh",                     "public": True},
+    "cve_search":              {"display": "CVE Search",                 "public": True},
+    "cxsecurity":              {"display": "CXSecurity",                 "public": True},
+    "duckduckgo_api":          {"display": "Google / DDG Dorks",         "public": True},
+    "emailrep_io":             {"display": "EmailRep.io",                "public": True},
+    "github_users":            {"display": "GitHub Users",               "public": True},
+    "gitlab_search":           {"display": "GitLab Search",              "public": True},
+    "gravatar":                {"display": "Gravatar",                   "public": True},
+    "hackernews_user":         {"display": "HackerNews User",            "public": True},
+    "hackertarget_dnslookup":  {"display": "HackerTarget DNS Lookup",    "public": True},
+    "hackertarget_hostsearch": {"display": "HackerTarget Host Search",   "public": True},
+    "hackertarget_reverseip":  {"display": "HackerTarget Reverse IP",    "public": True},
+    "hackertarget_whois":      {"display": "WHOIS (HackerTarget)",       "public": True},
+    "hudsonrock_osint":        {"display": "HudsonRock OSINT",           "public": True},
+    "ipapi_co":                {"display": "ipapi.co",                   "public": True},
+    "ipinfo_io":               {"display": "IPInfo.io",                  "public": True},
+    "ipvigilante":             {"display": "IPVigilante",                "public": True},
+    "keybase_lookup":          {"display": "Keybase Lookup",             "public": True},
+    "keybase_proofs":          {"display": "Keybase Proofs",             "public": True},
+    "maltiverse_ip":           {"display": "Maltiverse IP",              "public": True},
+    "npm_user":                {"display": "NPM User",                   "public": True},
+    "packetstorm":             {"display": "PacketStorm",                "public": True},
+    "phishtank_check":         {"display": "PhishTank",                  "public": True},
+    "pulsedive":               {"display": "Pulsedive (Free)",           "public": True},
+    "pypi_user":               {"display": "PyPI User",                  "public": True},
+    "reddit_user":             {"display": "Reddit User",                "public": True},
+    "robtex_ip":               {"display": "Robtex IP",                  "public": True},
+    "scamwatcher":             {"display": "ScamWatcher",                "public": True},
+    "social_scan":             {"display": "Social Scan",                "public": True},
+    "sublist3r_api":           {"display": "Sublist3r API",              "public": True},
+    "threatcrowd_domain":      {"display": "ThreatCrowd (Domain)",       "public": True},
+    "threatcrowd_email":       {"display": "ThreatCrowd (Email)",        "public": True},
+    "threatminer_domain":      {"display": "ThreatMiner (Domain)",       "public": True},
+    "threatminer_ip":          {"display": "ThreatMiner (IP)",           "public": True},
+    "urlscan_search":          {"display": "URLScan.io",                 "public": True},
+    "vigilante_pw":            {"display": "Vigilante.pw",               "public": True},
+    "wayback_machine":         {"display": "Wayback Machine",            "public": True},
+    # ── Private / key-required ────────────────────────────────────────
+    "ABSTRACT_API_KEY":         {"display": "Abstract Email Validation", "public": False},
+    "ABUSEIPDB_API_KEY":        {"display": "AbuseIPDB",                 "public": False},
+    "ANYRUN_API_KEY":           {"display": "Any.run",                   "public": False},
+    "BA_API_KEY":               {"display": "BreachAware",               "public": False},
+    "BD_API_KEY":               {"display": "BreachDirectory",           "public": False},
+    "BINARYEDGE_API_KEY":       {"display": "BinaryEdge",                "public": False},
+    "BING_API_KEY":             {"display": "Bing Search API",           "public": False},
+    "CENSYS_AUTH_BASE64":       {"display": "Censys",                    "public": False},
+    "CIRCL_AUTH_BASE64":        {"display": "CIRCL.lu PDNS",             "public": False},
+    "CIT0DAY_API_KEY":          {"display": "Cit0day",                   "public": False},
+    "CLEARBIT_API_KEY":         {"display": "Clearbit Enrich",           "public": False},
+    "CRIMINALIP_API_KEY":       {"display": "CriminalIP",                "public": False},
+    "DEHASHED_AUTH_BASE64":     {"display": "Dehashed",                  "public": False},
+    "DNSDB_API_KEY":            {"display": "DNSDB Passive DNS",         "public": False},
+    "DT_AUTH_BASE64":           {"display": "DomainTools WHOIS",         "public": False},
+    "EXTREME_API_KEY":          {"display": "Extreme IP Lookup",         "public": False},
+    "FLP_API_KEY":              {"display": "FraudLabsPro",              "public": False},
+    "FOFA_API_KEY":             {"display": "FOFA",                      "public": False},
+    "FOFA_EMAIL":              {"display": "FOFA (account email)",      "public": False},
+    "FULLCONTACT_API_KEY":      {"display": "FullContact",               "public": False},
+    "GITHUB_TOKEN":             {"display": "GitHub (Code/Repo Search)", "public": False},
+    "GOOGLE_API_KEY":           {"display": "Google Safe Browsing",      "public": False},
+    "GOOGLE_CX_KEY":            {"display": "Google Custom Search (API key)", "public": False},
+    "GOOGLE_CX_ID":            {"display": "Google Custom Search (CX ID)",   "public": False},
+    "GREYNOISE_API_KEY":        {"display": "GreyNoise",                 "public": False},
+    "HASHES_API_KEY":           {"display": "Hashes.org",                "public": False},
+    "HIBP_API_KEY":             {"display": "HaveIBeenPwned",            "public": False},
+    "HIPPO_API_KEY":            {"display": "EmailHippo",                "public": False},
+    "HUNTER_API_KEY":           {"display": "Hunter.io",                 "public": False},
+    "HYBRID_API_KEY":           {"display": "Hybrid Analysis",           "public": False},
+    "INTELX_API_KEY":           {"display": "IntelX",                    "public": False},
+    "INTEZER_API_KEY":          {"display": "Intezer",                   "public": False},
+    "IPDATA_API_KEY":           {"display": "IPData.co",                 "public": False},
+    "IPGEO_API_KEY":            {"display": "IPGeolocation.io",          "public": False},
+    "IPINFODB_API_KEY":         {"display": "IPInfoDB",                  "public": False},
+    "IPQS_API_KEY":             {"display": "IPQualityScore",            "public": False},
+    "IPSTACK_API_KEY":          {"display": "IPStack",                   "public": False},
+    "JOE_API_KEY":              {"display": "Joe Sandbox",               "public": False},
+    "LEAKCHECK_API_KEY":        {"display": "LeakCheck",                 "public": False},
+    "LEAKIX_API_KEY":           {"display": "LeakIX",                    "public": False},
+    "LEAKSTATS_API_KEY":        {"display": "LeakStats.pw",              "public": False},
+    "MAILBOX_API_KEY":          {"display": "Mailboxlayer",              "public": False},
+    "MALSHARE_API_KEY":         {"display": "MalShare",                  "public": False},
+    "METADEFENDER_API_KEY":     {"display": "MetaDefender",              "public": False},
+    "MISP_API_KEY":             {"display": "MISP",                      "public": False},
+    "NUMVERIFY_API_KEY":        {"display": "Numverify",                 "public": False},
+    "ONYPHE_API_KEY":           {"display": "Onyphe",                    "public": False},
+    "PASSIVETOTAL_AUTH_BASE64": {"display": "PassiveTotal / RiskIQ",     "public": False},
+    "PIPL_API_KEY":             {"display": "Pipl",                      "public": False},
+    "PULSEDIVE_API_KEY":        {"display": "Pulsedive (Premium)",       "public": False},
+    "RF_TOKEN":                 {"display": "Recorded Future",           "public": False},
+    "SECURITYTRAILS_API_KEY":   {"display": "SecurityTrails",            "public": False},
+    "SHODAN_API_KEY":           {"display": "Shodan",                    "public": False},
+    "SNUSBASE_API_KEY":         {"display": "Snusbase",                  "public": False},
+    "SPYCLOUD_API_KEY":         {"display": "SpyCloud",                  "public": False},
+    "SPYONWEB_API_KEY":         {"display": "SpyOnWeb",                  "public": False},
+    "SPYSE_API_KEY":            {"display": "Spyse",                     "public": False},
+    "TC_API_KEY":               {"display": "ThreatConnect",             "public": False},
+    "TINES_API_KEY":            {"display": "Tines Breach",              "public": False},
+    "TP_API_KEY":               {"display": "ThreatPortal",              "public": False},
+    "TWITTER_BEARER_TOKEN":     {"display": "Twitter / X API v2",        "public": False},
+    "URLVOID_API_KEY":          {"display": "URLVoid",                   "public": False},
+    "VIEWDNS_API_KEY":          {"display": "ViewDNS",                   "public": False},
+    "VIRUSTOTAL_API_KEY":       {"display": "VirusTotal",                "public": False},
+    "VULNERS_API_KEY":          {"display": "Vulners",                   "public": False},
+    "WF_API_KEY":               {"display": "WhoisFreaks",               "public": False},
+    "WHOISXML_API_KEY":         {"display": "WhoisXML API",              "public": False},
+    "WHOXY_API_KEY":            {"display": "Whoxy WHOIS",               "public": False},
+    "ZEROBOUNCE_API_KEY":       {"display": "ZeroBounce",                "public": False},
+    "ZOOMEYE_API_KEY":          {"display": "ZoomEye",                   "public": False},
+}
+
+_PRIVATE_KEYS = {k: v for k, v in SERVICE_REGISTRY.items() if not v["public"]}
+
+
+# ── Store helpers ──────────────────────────────────────────────────────
+
+def _default_store() -> Dict[str, str]:
+    """Return a dict of all private service keys set to UNIVERSAL_PLACEHOLDER."""
+    return {k: UNIVERSAL_PLACEHOLDER for k in _PRIVATE_KEYS}
+
+
+def _write_store(data: Dict[str, str]) -> None:
+    """Atomically write data to apikeys.json with chmod 0600."""
+    try:
+        _CONFIG_DIR.mkdir(mode=0o700, parents=True, exist_ok=True)
+        _CONFIG_DIR.chmod(0o700)
+        tmp = _APIKEYS_FILE.with_suffix(".tmp")
+        tmp.write_text(json.dumps(data, indent=4, sort_keys=True), encoding="utf-8")
+        tmp.replace(_APIKEYS_FILE)
+        _APIKEYS_FILE.chmod(0o600)
+    except PermissionError as exc:
+        raise RuntimeError(f"[config_handler] Cannot write {_APIKEYS_FILE}: {exc}") from exc
+
+
+def _load_store() -> Dict[str, str]:
+    """Load apikeys.json, creating it with defaults if absent. Self-heals on corrupt files."""
+    _CONFIG_DIR.mkdir(mode=0o700, parents=True, exist_ok=True)
+    _CONFIG_DIR.chmod(0o700)
+    if not _APIKEYS_FILE.exists():
+        print("  \033[92m[+]\033[0m Initializing NOX Environment in ~/.config/nox-cli/")
+        _write_store(_default_store())
+        return _default_store()
+    try:
+        text = _APIKEYS_FILE.read_text(encoding="utf-8").strip()
+        if not text:
+            raise json.JSONDecodeError("Empty file", "", 0)
+        data = json.loads(text)
+        if not isinstance(data, dict):
+            raise json.JSONDecodeError("Root is not a JSON object", text, 0)
+        # Back-fill keys added in newer versions
+        new_keys = {k: UNIVERSAL_PLACEHOLDER for k in _PRIVATE_KEYS if k not in data}
+        if new_keys:
+            data.update(new_keys)
+            _write_store(data)
+        return data
+    except json.JSONDecodeError:
+        bak = _APIKEYS_FILE.with_suffix(".json.bak")
+        _APIKEYS_FILE.rename(bak)
+        print(f"[!] Malformed apikeys.json detected — backed up to {bak.name} and reset to defaults.")
+        defaults = _default_store()
+        _write_store(defaults)
+        return defaults
+    except PermissionError as exc:
+        raise RuntimeError(f"[config_handler] Cannot read {_APIKEYS_FILE}: {exc}") from exc
+
+
+# ── ConfigManager ──────────────────────────────────────────────────────
+
+class ConfigManager:
+    """
+    Unified API key manager.
+
+    Resolution order per key:
+      1. Environment variable (exact key name)
+      2. ~/.config/nox-cli/apikeys.json
+      3. Returns None if value equals UNIVERSAL_PLACEHOLDER or is absent
+    """
+
+    _cache: Dict[str, Optional[str]] = {}
+    _store: Optional[Dict[str, str]] = None
+
+    @classmethod
+    def _get_store(cls) -> Dict[str, str]:
+        if cls._store is None:
+            cls._store = _load_store()
+        return cls._store
+
+    @classmethod
+    def get_key(cls, key_name: str) -> Optional[str]:
+        """Return the configured value, or None if missing/placeholder."""
+        if key_name in cls._cache:
+            return cls._cache[key_name]
+        val = os.environ.get(key_name, "") or cls._get_store().get(key_name, "")
+        result = None if (not val or val == UNIVERSAL_PLACEHOLDER) else val
+        cls._cache[key_name] = result
+        return result
+
+    # Backward-compatible alias used by nox.py internals
+    get = get_key
+
+    @classmethod
+    def set(cls, key_name: str, value: str) -> None:
+        """Persist a key to apikeys.json and update the in-memory cache."""
+        store = cls._get_store()
+        store[key_name] = value
+        _write_store(store)
+        cls._cache[key_name] = None if value == UNIVERSAL_PLACEHOLDER else value
+
+    @classmethod
+    def config_path(cls) -> Path:
+        return _APIKEYS_FILE
@@ -0,0 +1,119 @@
+"""
+sources/helpers/cracker.py
+Resilient async hash cracker for NOX autoscan.
+
+Detects MD5 / SHA1 / SHA256 / bcrypt hashes inside breach records,
+fires background crack attempts against available APIs, and returns
+results without ever blocking the main pivot pipeline.
+"""
+
+import asyncio
+import logging
+import re
+from typing import List, Optional, Tuple
+
+# C2: MD5 and NTLM share the same 32-char hex pattern.
+# We list md5 first (most common in breach data) but also accept ntlm
+# so callers can query NTLM-specific APIs when needed.
+_PATTERNS: List[Tuple[str, re.Pattern]] = [
+    ("bcrypt",  re.compile(r"^\$2[aby]?\$\d{2}\$.{53}$")),
+    ("sha256",  re.compile(r"^[a-f0-9]{64}$", re.I)),
+    ("sha1",    re.compile(r"^[a-f0-9]{40}$", re.I)),
+    ("md5",     re.compile(r"^[a-f0-9]{32}$", re.I)),
+    # ntlm shares the 32-char hex pattern — detected as md5 first,
+    # but async_crack queries both md5 and ntlm APIs for 32-char hashes.
+]
+
+# Writes to ~/.config/nox-cli/logs/nox_system.log — never to terminal
+_syslog = logging.getLogger("nox.system")
+
+# Per-API timeout — each individual rainbow-table query budget
+_API_TIMEOUT = 8
+# Global crack budget — hard cap regardless of API count or response order
+CRACK_TIMEOUT = 20
+
+
+def detect_hash(value: str) -> Optional[str]:
+    """Return hash type string if value matches a known hash pattern, else None."""
+    v = value.strip()
+    for htype, pat in _PATTERNS:
+        if pat.match(v):
+            return htype
+    return None
+
+
+async def _query_api(session, url: str, fmt: str) -> Optional[str]:
+    """Single API query — returns plaintext or None. Never raises."""
+    try:
+        import aiohttp
+        to = aiohttp.ClientTimeout(total=_API_TIMEOUT)
+        async with session.get(url, timeout=to) as resp:
+            if resp.status != 200:
+                return None
+            if fmt == "text":
+                text = (await resp.text()).strip()
+                # Reject empty, too-long, or obvious error responses
+                if not text or len(text) > 128:
+                    return None
+                tl = text.lower()
+                if any(tl.startswith(p) for p in ("not found", "error", "invalid", "no result", "not in", "cmd5-error", "not exist", "code erreur", "erreur", "unknown")):
+                    return None
+                return text
+            data = await resp.json(content_type=None)
+            return data.get("result") or data.get("plaintext") or data.get("plain") or None
+    except Exception:
+        return None
+
+
+async def async_crack(session, hash_value: str, hash_type: str) -> Optional[str]:
+    """
+    Query multiple rainbow-table APIs concurrently.
+    Returns first plaintext found, or None. bcrypt is skipped.
+
+    C1: create tasks upfront for cancellation, but await each via asyncio.shield
+    inside as_completed — no double wait_for wrapping.
+    C2: for 32-char hex (md5/ntlm ambiguity), also query NTLM-specific APIs.
+
+    Per-API timeout: 8s. Global budget: 20s (CRACK_TIMEOUT).
+    All tasks are cancelled as soon as the first result is found.
+    """
+    if hash_type == "bcrypt":
+        return None
+
+    h = hash_value.strip().lower()
+    apis = [
+        (f"https://www.nitrxgen.net/md5db/{h}",                                    "text"),
+        (f"https://hashes.com/en/api/hash?hash={h}",                               "json"),
+        (f"https://hash.help/api/lookup/{h}",                                       "json"),
+        (f"https://hashkiller.io/api/search.php?hash={h}",                         "json"),
+        (f"https://md5decrypt.net/Api/api.php?hash={h}&hash_type={hash_type}&email=&code=", "text"),
+        (f"https://www.cmd5.org/api.ashx?hash={h}",                                "text"),
+    ]
+    # C2: for 32-char hashes (md5/ntlm ambiguous), add NTLM-specific endpoint
+    if hash_type == "md5" and len(h) == 32:
+        apis.append((f"https://hashes.com/en/api/hash?hash={h}&type=ntlm", "json"))
+
+    # C1: create tasks so we can cancel them; shield each before passing to wait_for
+    # so cancellation of the shield future does not cancel the underlying task prematurely.
+    tasks = [asyncio.create_task(_query_api(session, url, fmt)) for url, fmt in apis]
+    result: Optional[str] = None
+    try:
+        for fut in asyncio.as_completed(tasks):
+            try:
+                res = await asyncio.wait_for(asyncio.shield(fut), timeout=_API_TIMEOUT)
+            except (asyncio.TimeoutError, asyncio.CancelledError):
+                continue
+            except Exception:
+                continue
+            if res:
+                result = res
+                break
+    except Exception:
+        pass
+    finally:
+        # Cancel all remaining tasks and await to suppress pending-task warnings
+        for t in tasks:
+            if not t.done():
+                t.cancel()
+        await asyncio.gather(*[t for t in tasks if not t.done()], return_exceptions=True)
+    return result
@@ -0,0 +1,658 @@
+"""
+sources/helpers/reporting.py
+NOX Enterprise Reporting — Executive Summary, Pivot Chain, Data Sanitization.
+"""
+
+import hashlib
+import html as _html
+import json
+import re
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List
+
+# ── Noise patterns stripped from all report output ────────────────────
+_NOISE_RE = re.compile(
+    r"(Traceback \(most recent|File \".*\.py\"|TimeoutError|ProxyError"
+    r"|ConnectionError|aiohttp\.|ClientConnector|ssl\.|asyncio\."
+    r"|Task exception|NoneType|Object of type)",
+    re.I,
+)
+_CTRL_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]")
+
+
+def _nox_ver() -> str:
+    try:
+        from nox import VERSION  # type: ignore
+        return VERSION
+    except ImportError:
+        return "1.0.0"
+
+
+def _clean(v: Any, maxlen: int = 200) -> str:
+    """Strip control chars, technical noise, HTML-escape, truncate."""
+    s = str(v) if v is not None else ""
+    s = _CTRL_RE.sub("", s)
+    if _NOISE_RE.search(s):
+        return ""
+    return _html.escape(s[:maxlen])
+
+
+def _raw(v: Any, maxlen: int = 200) -> str:
+    """Strip control chars only — no HTML escaping (PDF / plain-text paths)."""
+    s = str(v) if v is not None else ""
+    s = _CTRL_RE.sub("", s)
+    if _NOISE_RE.search(s):
+        return ""
+    return s[:maxlen]
+
+
+def _pdf_safe(s: str, maxlen: int = 180) -> str:
+    # D4: sanitize for fpdf2 core fonts (latin-1 subset).
+    # NFKD normalization decomposes accented chars (é→e + combining accent)
+    # so common accented Latin characters survive as their base letter.
+    # Truly non-latin-1 chars (Cyrillic, CJK, etc.) become '?' — intentional:
+    # fpdf2 core fonts cannot render them and would raise UnicodeEncodeError.
+    s = _raw(s, maxlen)
+    try:
+        import unicodedata
+        normalized = unicodedata.normalize("NFKD", s)
+        return normalized.encode("ascii", errors="replace").decode("ascii")
+    except Exception:
+        return s.encode("latin-1", errors="replace").decode("latin-1")
+
+
+def _rget(r: Any, k: str) -> str:
+    if isinstance(r, dict):
+        return str(r.get(k, "") or "")
+    return str(getattr(r, k, "") or "")
+
+
+# ── Executive summary builder ─────────────────────────────────────────
+
+def build_exec_summary(data: dict) -> dict:
+    """
+    Returns a dict with all dashboard KPIs needed by every format.
+    Expects data keys: records, analysis, scan_meta (optional).
+    """
+    records   = data.get("records", [])
+    meta      = data.get("scan_meta", {}) or {}
+    analysis  = data.get("analysis", {}) or {}
+
+    cleartext = sum(1 for r in records if _rget(r, "password"))
+    nodes     = len({_rget(r, "email") or _rget(r, "username") for r in records} - {""})
+    elapsed   = meta.get("elapsed_seconds")
+    depth     = meta.get("pivot_depth", len(data.get("pivot_chain", [])))
+
+    buckets: Dict[str, int] = {"Critical": 0, "High": 0, "Medium": 0, "Low": 0, "Info": 0}
+    for r in records:
+        rs = float(_rget(r, "risk_score") or 0)
+        if rs >= 90:   buckets["Critical"] += 1
+        elif rs >= 70: buckets["High"]     += 1
+        elif rs >= 40: buckets["Medium"]   += 1
+        elif rs >= 10: buckets["Low"]      += 1
+        else:          buckets["Info"]     += 1
+
+    return {
+        "total_records":    len(records),
+        "nodes_discovered": nodes,
+        "cleartext_passwords": cleartext,
+        "pivot_depth":      depth,
+        "elapsed":          f"{elapsed:.1f}s" if elapsed is not None else "N/A",
+        "buckets":          buckets,
+        "hvt_count":        analysis.get("hvt_count", sum(1 for r in records if getattr(r, "is_hvt", False))),
+    }
+
+
+# ── Pivot chain renderer ──────────────────────────────────────────────
+
+def render_pivot_chain(data: dict) -> List[str]:
+    """
+    Build a human-readable pivot chain.
+    D2: check pivot_log first before falling back to record-based reconstruction.
+    """
+    chain  = data.get("pivot_chain") or []
+    target = _raw(data.get("target", "?"))
+
+    # D2: if pivot_log is available, build chain from it (accurate tree)
+    pivot_log = data.get("pivot_log") or []
+    if pivot_log:
+        lines: List[str] = []
+        for e in pivot_log:
+            depth  = e.get("depth", 0)
+            asset  = _raw(e.get("asset", ""))
+            phase  = _raw(e.get("found_in", e.get("source", "?")))
+            parent = _raw(e.get("parent") or "")
+            prefix = "  " * depth
+            if depth == 0:
+                lines.append(f"[SEED] {asset}")
+            else:
+                lines.append(f"{prefix}└─ [{phase}] {asset}  ← {parent}")
+        return lines if lines else [f"[SEED] {target}  (no pivot data)"]
+
+    if len(chain) <= 1:
+        # No pivot data — reconstruct best-effort from records
+        records = data.get("records", [])
+        lines = [f"[SEED] {target}"]
+        seen: set = {target.lower()}
+        for r in records[:40]:
+            src   = _raw(_rget(r, "source"))
+            em    = _raw(_rget(r, "email"))
+            usr   = _raw(_rget(r, "username"))
+            ident = em or usr
+            if not ident or ident.lower() in seen:
+                continue
+            seen.add(ident.lower())
+            lines.append(f"  └─ [{src}] → {ident}")
+        dork_results = data.get("dork_results") or []
+        for d in dork_results[:5]:
+            url = _raw(d.get("url", ""))
+            if url and url.lower() not in seen:
+                seen.add(url.lower())
+                lines.append(f"  └─ [Dork] → {url[:80]}")
+        return lines if len(lines) > 1 else [f"[SEED] {target}  (no pivot data)"]
+
+    # Ordered pivot chain from AvalancheScanner
+    lines = [f"[SEED] {_raw(chain[0])}"]
+    for node in chain[1:]:
+        lines.append(f"  └─ [Pivot] → {_raw(node)}")
+    return lines
+
+
+# ── JSON report ───────────────────────────────────────────────────────
+
+def to_json(data: dict, path: str) -> None:
+    summary = build_exec_summary(data)
+    chain   = render_pivot_chain(data)
+    records = data.get("records", [])
+
+    def _ser(o):
+        try:
+            from enum import Enum
+            if isinstance(o, Enum):
+                return o.name
+        except ImportError:
+            pass
+        if hasattr(o, "to_dict"):
+            return o.to_dict()
+        return str(o)
+
+    clean_records = []
+    for r in records:
+        d = r.to_dict() if hasattr(r, "to_dict") else (r if isinstance(r, dict) else {})
+        # drop noise fields
+        clean_records.append({
+            k: v for k, v in d.items()
+            if k not in ("raw_data", "metadata") and not _NOISE_RE.search(str(v or ""))
+        })
+
+    try:
+        from nox import VERSION as _NOX_VERSION  # type: ignore
+    except ImportError:
+        _NOX_VERSION = "1.0.0"
+
+    # Include dork and scrape results in JSON output
+    dork_results   = data.get("dork_results", []) or []
+    scrape_results = data.get("scrape_results", {}) or {}
+
+    # D3: apply consistent cap (1000) — same as HTML
+    _RECORD_CAP = 1000
+
+    out_data = {
+        "framework":       f"NOX v{_NOX_VERSION}",
+        "generated":       datetime.now().isoformat(),
+        "target":          data.get("target", ""),
+        # J3: self-describing metadata block
+        "_meta": {
+            "scan_id":        hashlib.sha256(
+                f"{data.get('target','')}{datetime.now().isoformat()}".encode()
+            ).hexdigest()[:16],
+            "target":         data.get("target", ""),
+            "timestamp":      datetime.now().isoformat(),
+            "nox_version":    _NOX_VERSION,
+            "sources_queried": summary.get("total_records", 0),
+            "pivot_depth_reached": summary.get("pivot_depth", 0),
+            "record_cap":     _RECORD_CAP,
+            "truncated":      len(clean_records) > _RECORD_CAP,
+        },
+        "executive_summary": summary,
+        "pivot_chain":     chain,
+        "records":         clean_records[:_RECORD_CAP],
+        "dork_results":    dork_results,
+        "scrape_results":  scrape_results,
+    }
+    Path(path).write_text(json.dumps(out_data, indent=2, default=_ser), encoding="utf-8")
+    print(f"[+] JSON report saved: {path}")
+
+
+# ── HTML report ───────────────────────────────────────────────────────
+
+_CSS = (
+    "*{margin:0;padding:0;box-sizing:border-box}"
+    "body{font-family:'Courier New',monospace;background:#0a0a0a;color:#e0e0e0;padding:20px}"
+    ".hdr{text-align:center;padding:28px;border:1px solid #333;margin-bottom:18px;background:#111}"
+    ".hdr h1{color:#00ff41;font-size:26px;letter-spacing:4px}"
+    ".hdr p{color:#888;margin-top:5px;font-size:12px}"
+    ".kpis{display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));gap:10px;margin:14px 0}"
+    ".kpi{background:#111;border:1px solid #333;padding:16px;text-align:center}"
+    ".kpi .n{font-size:30px;font-weight:bold;color:#00ff41}"
+    ".kpi .l{color:#888;font-size:10px;margin-top:3px}"
+    ".kpi.warn .n{color:#ff6600} .kpi.crit .n{color:#ff0040}"
+    ".sec{margin:18px 0} .sec h2{color:#00ff41;border-bottom:1px solid #333;padding-bottom:5px;margin-bottom:10px}"
+    ".chain{background:#0d1a0d;border:1px solid #1a3a1a;padding:12px;font-size:11px;color:#00cc33;word-break:break-all;margin:8px 0}"
+    "table{width:100%;border-collapse:collapse} th,td{padding:7px;border:1px solid #222;font-size:11px;word-break:break-all}"
+    "th{background:#1a1a1a;color:#00ff41;text-transform:uppercase;font-size:10px} td{background:#0d0d0d}"
+    "tr.c td{background:#1a0005} tr.h td{background:#1a0a00} tr.m td{background:#1a1500}"
+    ".pw{color:#ff0040;font-weight:bold}"
+)
+
+
+def to_html(data: dict, path: str) -> None:
+    summary = build_exec_summary(data)
+    chain   = render_pivot_chain(data)
+    target  = _clean(data.get("target", "Unknown"))
+    records = data.get("records", [])
+    ts      = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")
+
+    # KPI dashboard
+    kpis = (
+        f'<div class="kpi"><div class="n">{summary["total_records"]}</div><div class="l">TOTAL RECORDS</div></div>'
+        f'<div class="kpi"><div class="n">{summary["nodes_discovered"]}</div><div class="l">NODES DISCOVERED</div></div>'
+        f'<div class="kpi crit"><div class="n">{summary["cleartext_passwords"]}</div><div class="l">CLEARTEXT PASSWORDS</div></div>'
+        f'<div class="kpi warn"><div class="n">{summary["hvt_count"]}</div><div class="l">HIGH-VALUE TARGETS</div></div>'
+        f'<div class="kpi"><div class="n">{summary["pivot_depth"]}</div><div class="l">PIVOT DEPTH</div></div>'
+        f'<div class="kpi"><div class="n">{summary["elapsed"]}</div><div class="l">TOTAL TIME</div></div>'
+    )
+
+    # Severity table
+    sev_rows = "".join(
+        f"<tr><td>{lvl}</td><td>{cnt}</td></tr>"
+        for lvl, cnt in summary["buckets"].items() if cnt
+    )
+
+    # Pivot chain
+    chain_html = "".join(f'<div class="chain">{_clean(c)}</div>' for c in chain)
+
+    # Credential rows (top 500, noise-free)
+    cred_rows = ""
+    for r in records[:500]:
+        rs  = float(_rget(r, "risk_score") or 0)
+        cls = "c" if rs >= 90 else "h" if rs >= 70 else "m" if rs >= 40 else ""
+        em  = _clean(_rget(r, "email") or _rget(r, "username"))
+        pw  = _clean(_rget(r, "password"))
+        src = _clean(_rget(r, "source"))
+        bd  = _clean(_rget(r, "breach_date"))
+        hvt = " ⚑" if getattr(r, "is_hvt", False) or (isinstance(r, dict) and r.get("is_hvt")) else ""
+        cred_rows += (
+            f"<tr class='{cls}'><td>{em}{hvt}</td>"
+            f"<td class='pw'>{pw}</td><td>{src}</td><td>{bd}</td><td>{rs:.0f}</td></tr>"
+        )
+
+    # Dork results section
+    dork_results = data.get("dork_results", []) or []
+    dork_rows = ""
+    for h in dork_results:
+        url     = h.get("url", "")
+        title   = h.get("title", "") or h.get("dork", "")
+        snippet = h.get("snippet", "")
+        engine  = h.get("engine", "")
+        link    = (f'<a href="{_clean(url)}" style="color:#00ff41" target="_blank">{_clean(url[:80])}</a>'
+                   if url else _clean(title[:80]))
+        dork_rows += (
+            f"<tr><td>{link}</td><td>{_clean(snippet[:120])}</td>"
+            f"<td>{_clean(h.get('dork','')[:80])}</td><td>{_clean(engine)}</td></tr>"
+        )
+    dork_section = (
+        f'<div class="sec"><h2>Dork Results ({len(dork_results)} hits)</h2>'
+        f'<table><thead><tr><th>URL / Title</th><th>Snippet</th><th>Dork Query</th><th>Engine</th></tr></thead>'
+        f'<tbody>{dork_rows if dork_rows else "<tr><td colspan=4 style=text-align:center>No dork hits</td></tr>"}</tbody></table></div>'
+    )
+
+    # Scrape results section
+    scrape_results = data.get("scrape_results", {}) or {}
+    pastes   = scrape_results.get("pastes", [])
+    creds_sc = scrape_results.get("credentials", [])
+    tg_hits  = scrape_results.get("telegram", [])
+    mc_hits  = scrape_results.get("dork_misconfigs", [])
+
+    paste_rows = ""
+    for p in pastes:
+        site = _clean(p.get("site", ""))
+        pid  = p.get("id", "")
+        pats = _clean(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items()))
+        paste_rows += f"<tr><td>{site}</td><td>{_clean(pid)}</td><td>{pats}</td></tr>"
+
+    cred_sc_rows = ""
+    for c in creds_sc:
+        cred_sc_rows += (
+            f"<tr><td class='pw'>{_clean(c.get('raw','')[:120])}</td>"
+            f"<td>{_clean(c.get('source',''))}</td><td>{_clean(c.get('paste_id',''))}</td></tr>"
+        )
+
+    tg_rows = ""
+    for t in tg_hits:
+        ch   = _clean(t.get("channel", ""))
+        text = _clean(t.get("text", "")[:200])
+        pats = _clean(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items()))
+        link = f'<a href="https://t.me/s/{ch}" style="color:#00ff41" target="_blank">t.me/s/{ch}</a>'
+        tg_rows += f"<tr><td>{link}</td><td>{text}</td><td>{pats}</td></tr>"
+
+    mc_rows = ""
+    for m in mc_hits:
+        url_m   = m.get("url", "")
+        title_m = _clean(m.get("title", "")[:80])
+        dork_m  = _clean(m.get("dork", "")[:80])
+        link_m  = (f'<a href="{_clean(url_m)}" style="color:#ff0040" target="_blank">{_clean(url_m[:80])}</a>'
+                   if url_m else title_m)
+        mc_rows += f"<tr><td>{link_m}</td><td>{title_m}</td><td>{dork_m}</td></tr>"
+
+    scrape_section = (
+        f'<div class="sec"><h2>Scrape Results</h2>'
+        f'<h3 style="color:#aaa;margin:10px 0 5px">Pastes ({len(pastes)})</h3>'
+        f'<table><thead><tr><th>Site</th><th>Paste ID</th><th>Patterns</th></tr></thead>'
+        f'<tbody>{paste_rows or "<tr><td colspan=3 style=text-align:center>None</td></tr>"}</tbody></table>'
+        f'<h3 style="color:#aaa;margin:10px 0 5px">Extracted Credentials ({len(creds_sc)})</h3>'
+        f'<table><thead><tr><th>Raw Credential</th><th>Source</th><th>Paste ID</th></tr></thead>'
+        f'<tbody>{cred_sc_rows or "<tr><td colspan=3 style=text-align:center>None</td></tr>"}</tbody></table>'
+        f'<h3 style="color:#aaa;margin:10px 0 5px">Telegram CTI ({len(tg_hits)})</h3>'
+        f'<table><thead><tr><th>Channel</th><th>Message</th><th>Patterns</th></tr></thead>'
+        f'<tbody>{tg_rows or "<tr><td colspan=3 style=text-align:center>None</td></tr>"}</tbody></table>'
+        f'<h3 style="color:#aaa;margin:10px 0 5px">Misconfigurations ({len(mc_hits)})</h3>'
+        f'<table><thead><tr><th>URL</th><th>Title</th><th>Dork</th></tr></thead>'
+        f'<tbody>{mc_rows or "<tr><td colspan=3 style=text-align:center>None</td></tr>"}</tbody></table>'
+        f'</div>'
+    )
+
+    page = (
+        f'<!DOCTYPE html><html><head><meta charset="utf-8">'
+        f'<title>NOX — {target}</title><style>{_CSS}</style></head><body>'
+        f'<div class="hdr"><h1>[ NOX ]</h1>'
+        f'<p>Target: {target} &nbsp;|&nbsp; {ts} &nbsp;|&nbsp; NOX v{_nox_ver()}</p></div>'
+        f'<div class="sec"><h2>Executive Summary</h2>'
+        f'<div class="kpis">{kpis}</div>'
+        f'<table><thead><tr><th>Severity</th><th>Count</th></tr></thead>'
+        f'<tbody>{sev_rows}</tbody></table></div>'
+        f'<div class="sec"><h2>Pivot Chain</h2>{chain_html}</div>'
+        f'{dork_section}'
+        f'{scrape_section}'
+        f'<div class="sec"><h2>Credential Records (top 500)</h2>'
+        f'<table><thead><tr><th>Identity</th><th>Password</th><th>Source</th>'
+        f'<th>Date</th><th>Risk</th></tr></thead><tbody>{cred_rows}</tbody></table></div>'
+        f'</body></html>'
+    )
+    Path(path).write_text(page, encoding="utf-8")
+    print(f"[+] HTML report saved: {path}")
+
+
+# ── PDF report (fpdf2) ────────────────────────────────────────────────
+
+def to_pdf(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None:
+    # D1: raise a clear error with install hint if fpdf2 is absent — never silently return.
+    try:
+        from fpdf import FPDF  # type: ignore
+    except ImportError:
+        msg = "[!] fpdf2 not installed — PDF report cannot be generated. Run: pip install fpdf2"
+        print(msg)
+        raise RuntimeError(msg)
+
+    summary = build_exec_summary(data)
+    chain   = render_pivot_chain(data)
+    target  = _raw(data.get("target", "Unknown"))
+    records = data.get("records", [])
+    ts      = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")
+
+    class _PDF(FPDF):
+        def header(self):
+            self.set_font("Helvetica", "B", 8)
+            self.set_text_color(120, 120, 120)
+            self.cell(0, 5, "NOX - FORENSIC INTELLIGENCE REPORT - CONFIDENTIAL", align="R")
+            self.ln(3)
+
+        def footer(self):
+            self.set_y(-12)
+            self.set_font("Helvetica", "", 8)
+            self.set_text_color(150, 150, 150)
+            self.cell(0, 5, _pdf_safe(f"Page {self.page_no()} | {target[:50]}"), align="C")
+
+    pdf = _PDF(orientation="P", unit="mm", format="A4")
+    pdf.set_auto_page_break(auto=True, margin=15)
+    pdf.set_margins(15, 15, 15)
+
+    # ── Cover page ────────────────────────────────────────────────────
+    pdf.add_page()
+    pdf.set_fill_color(15, 15, 15)
+    pdf.rect(0, 0, 210, 297, "F")
+    pdf.set_y(65)
+    pdf.set_font("Helvetica", "B", 26)
+    pdf.set_text_color(0, 220, 60)
+    pdf.cell(0, 12, "FORENSIC INTELLIGENCE REPORT", align="C")
+    pdf.ln(8)
+    pdf.set_font("Helvetica", "B", 13)
+    pdf.set_text_color(200, 200, 200)
+    pdf.cell(0, 8, _pdf_safe(f"Target: {target}"), align="C")
+    pdf.ln(6)
+    pdf.set_font("Helvetica", "", 10)
+    pdf.set_text_color(140, 140, 140)
+    for line in [f"Generated: {ts}", f"Investigator: {investigator_id}",
+                 f"Framework: NOX v{_nox_ver()}", "Classification: RESTRICTED"]:
+        pdf.cell(0, 6, _pdf_safe(line), align="C")
+        pdf.ln(5)
+
+    # ── Executive Summary ─────────────────────────────────────────────
+    pdf.add_page()
+    pdf.set_fill_color(255, 255, 255)
+    pdf.set_text_color(0, 0, 0)
+    pdf.set_font("Helvetica", "B", 15)
+    pdf.cell(0, 10, "Executive Summary", ln=True)
+    pdf.set_draw_color(0, 180, 50)
+    pdf.set_line_width(0.4)
+    pdf.line(15, pdf.get_y(), 195, pdf.get_y())
+    pdf.ln(4)
+
+    kpis = [
+        ("Total Time",               summary["elapsed"]),
+        ("Nodes Discovered",         str(summary["nodes_discovered"])),
+        ("Cleartext Passwords Found", str(summary["cleartext_passwords"])),
+        ("Pivot Depth",              str(summary["pivot_depth"])),
+        ("Total Records",            str(summary["total_records"])),
+        ("High-Value Targets",       str(summary["hvt_count"])),
+    ]
+    pdf.set_font("Helvetica", "B", 10)
+    for label, value in kpis:
+        pdf.set_fill_color(245, 245, 245)
+        pdf.cell(95, 7, _pdf_safe(label), border=1, fill=True)
+        pdf.set_font("Helvetica", "", 10)
+        pdf.cell(80, 7, _pdf_safe(value), border=1, ln=True)
+        pdf.set_font("Helvetica", "B", 10)
+    pdf.ln(4)
+
+    # Severity breakdown
+    pdf.set_font("Helvetica", "B", 11)
+    pdf.cell(0, 7, "Severity Breakdown", ln=True)
+    _sev_c = {"Critical": (220,0,30), "High": (220,100,0),
+               "Medium": (200,180,0), "Low": (0,150,50), "Info": (100,100,100)}
+    total_b = max(sum(summary["buckets"].values()), 1)
+    for level, count in summary["buckets"].items():
+        pdf.set_font("Helvetica", "", 9)
+        pdf.cell(35, 6, _pdf_safe(level), border=1)
+        pdf.cell(20, 6, str(count), border=1)
+        bar_w = int(count / total_b * 120)
+        x, y  = pdf.get_x(), pdf.get_y()
+        pdf.cell(125, 6, "", border=1)
+        if bar_w:
+            rc, gc, bc = _sev_c.get(level, (100, 100, 100))
+            pdf.set_fill_color(rc, gc, bc)
+            pdf.rect(x + 1, y + 1, bar_w, 4, "F")
+        pdf.ln()
+
+    # ── Pivot Chain ───────────────────────────────────────────────────
+    pdf.ln(5)
+    pdf.set_font("Helvetica", "B", 11)
+    pdf.cell(0, 7, "Pivot Chain Visualization", ln=True)
+    pdf.line(15, pdf.get_y(), 195, pdf.get_y())
+    pdf.ln(3)
+    pdf.set_font("Courier", "", 8)
+    pdf.set_fill_color(240, 255, 240)
+    for c_line in chain:
+        # Word-wrap long chains at 100 chars
+        for chunk in [c_line[i:i+100] for i in range(0, max(len(c_line), 1), 100)]:
+            pdf.set_x(15)
+            pdf.cell(180, 5, _pdf_safe(chunk), border=0, ln=True, fill=True)
+    pdf.ln(3)
+
+    # ── Credential Findings ───────────────────────────────────────────
+    pdf.add_page()
+    pdf.set_font("Helvetica", "B", 13)
+    pdf.set_text_color(0, 0, 0)
+    pdf.cell(0, 9, "Credential Findings", ln=True)
+    pdf.line(15, pdf.get_y(), 195, pdf.get_y())
+    pdf.ln(3)
+
+    cols = [("Identity", 60), ("Password", 45), ("Source", 35), ("Date", 25), ("Risk", 15)]
+
+    def _write_col_headers():
+        pdf.set_font("Helvetica", "B", 8)
+        pdf.set_fill_color(30, 30, 30)
+        pdf.set_text_color(255, 255, 255)
+        for col_name, col_w in cols:
+            pdf.cell(col_w, 6, col_name, border=1, fill=True)
+        pdf.ln()
+        pdf.set_text_color(0, 0, 0)
+
+    _write_col_headers()
+
+    for r in records[:500]:
+        pw = _rget(r, "password")
+        if not pw and not _rget(r, "email") and not _rget(r, "username"):
+            continue  # skip noise rows with no actionable data
+        rs = float(_rget(r, "risk_score") or 0)
+        if rs >= 90:   pdf.set_fill_color(255, 220, 220)
+        elif rs >= 70: pdf.set_fill_color(255, 240, 220)
+        else:          pdf.set_fill_color(255, 255, 255)
+        pdf.set_font("Helvetica", "", 7)
+        # Auto page-break with repeated column headers (§5.1)
+        if pdf.get_y() > pdf.h - 25:
+            pdf.add_page()
+            _write_col_headers()
+        vals = [
+            _pdf_safe(_rget(r, "email") or _rget(r, "username"), 38),
+            _pdf_safe(pw, 28),
+            _pdf_safe(_rget(r, "source"), 22),
+            _pdf_safe(_rget(r, "breach_date"), 14),
+            f"{rs:.0f}",
+        ]
+        for val, (_, w) in zip(vals, cols):
+            pdf.cell(w, 5, val, border=1, fill=True)
+        pdf.ln()
+
+    # ── Dork Results ─────────────────────────────────────────────────
+    dork_results = data.get("dork_results", []) or []
+    if dork_results:
+        pdf.add_page()
+        pdf.set_font("Helvetica", "B", 13)
+        pdf.set_text_color(0, 0, 0)
+        pdf.cell(0, 9, _pdf_safe(f"Dork Results ({len(dork_results)} hits)"), ln=True)
+        pdf.line(15, pdf.get_y(), 195, pdf.get_y())
+        pdf.ln(3)
+        pdf.set_font("Helvetica", "B", 8)
+        pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
+        for col_name, col_w in [("URL / Title", 95), ("Snippet", 55), ("Engine", 30)]:
+            pdf.cell(col_w, 6, col_name, border=1, fill=True)
+        pdf.ln(); pdf.set_text_color(0, 0, 0)
+        for h in dork_results[:200]:
+            pdf.set_fill_color(245, 245, 255); pdf.set_font("Helvetica", "", 7)
+            url     = _pdf_safe(h.get("url", h.get("title", "")), 65)
+            snippet = _pdf_safe(h.get("snippet", ""), 38)
+            engine  = _pdf_safe(h.get("engine", ""), 20)
+            for val, w in zip([url, snippet, engine], [95, 55, 30]):
+                pdf.cell(w, 5, val, border=1, fill=True)
+            pdf.ln()
+
+    # ── Scrape Results ────────────────────────────────────────────────
+    scrape_results = data.get("scrape_results", {}) or {}
+    pastes      = scrape_results.get("pastes", [])
+    creds_sc    = scrape_results.get("credentials", [])
+    tg_hits     = scrape_results.get("telegram", [])
+    mc_hits     = scrape_results.get("dork_misconfigs", [])
+
+    if pastes or creds_sc or tg_hits or mc_hits:
+        pdf.add_page()
+        pdf.set_font("Helvetica", "B", 13)
+        pdf.set_text_color(0, 0, 0)
+        pdf.cell(0, 9, "Scrape Results", ln=True)
+        pdf.line(15, pdf.get_y(), 195, pdf.get_y())
+        pdf.ln(3)
+
+        if pastes:
+            pdf.set_font("Helvetica", "B", 10)
+            pdf.cell(0, 7, _pdf_safe(f"Pastes ({len(pastes)})"), ln=True)
+            pdf.set_font("Helvetica", "B", 8)
+            pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
+            for col_name, col_w in [("Site", 25), ("Paste ID", 80), ("Patterns", 75)]:
+                pdf.cell(col_w, 6, col_name, border=1, fill=True)
+            pdf.ln(); pdf.set_text_color(0, 0, 0)
+            for p in pastes[:100]:
+                pdf.set_fill_color(245, 245, 245); pdf.set_font("Helvetica", "", 7)
+                site = _pdf_safe(p.get("site", ""), 15)
+                pid  = _pdf_safe(p.get("id", ""), 55)
+                pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items()), 50)
+                for val, w in zip([site, pid, pats], [25, 80, 75]):
+                    pdf.cell(w, 5, val, border=1, fill=True)
+                pdf.ln()
+            pdf.ln(3)
+
+        if creds_sc:
+            pdf.set_font("Helvetica", "B", 10)
+            pdf.cell(0, 7, _pdf_safe(f"Extracted Credentials ({len(creds_sc)})"), ln=True)
+            pdf.set_font("Helvetica", "B", 8)
+            pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
+            for col_name, col_w in [("Raw Credential", 120), ("Source", 30), ("Paste ID", 30)]:
+                pdf.cell(col_w, 6, col_name, border=1, fill=True)
+            pdf.ln(); pdf.set_text_color(0, 0, 0)
+            for c in creds_sc[:150]:
+                pdf.set_fill_color(255, 240, 240); pdf.set_font("Helvetica", "", 7)
+                raw = _pdf_safe(c.get("raw", ""), 80)
+                src = _pdf_safe(c.get("source", ""), 20)
+                pid = _pdf_safe(c.get("paste_id", ""), 20)
+                for val, w in zip([raw, src, pid], [120, 30, 30]):
+                    pdf.cell(w, 5, val, border=1, fill=True)
+                pdf.ln()
+            pdf.ln(3)
+
+        if tg_hits:
+            pdf.set_font("Helvetica", "B", 10)
+            pdf.cell(0, 7, _pdf_safe(f"Telegram CTI ({len(tg_hits)})"), ln=True)
+            pdf.set_font("Helvetica", "B", 8)
+            pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
+            for col_name, col_w in [("Channel", 50), ("Message Excerpt", 100), ("Patterns", 30)]:
+                pdf.cell(col_w, 6, col_name, border=1, fill=True)
+            pdf.ln(); pdf.set_text_color(0, 0, 0)
+            for t in tg_hits[:80]:
+                pdf.set_fill_color(245, 245, 255); pdf.set_font("Helvetica", "", 7)
+                link = _pdf_safe(f"t.me/s/{t.get('channel','')}", 35)
+                text = _pdf_safe(t.get("text", ""), 70)
+                pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items()), 25)
+                for val, w in zip([link, text, pats], [50, 100, 30]):
+                    pdf.cell(w, 5, val, border=1, fill=True)
+                pdf.ln()
+            pdf.ln(3)
+
+        if mc_hits:
+            pdf.set_font("Helvetica", "B", 10)
+            pdf.cell(0, 7, _pdf_safe(f"Misconfigurations ({len(mc_hits)})"), ln=True)
+            pdf.set_font("Helvetica", "B", 8)
+            pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255)
+            for col_name, col_w in [("URL", 90), ("Title", 60), ("Dork", 30)]:
+                pdf.cell(col_w, 6, col_name, border=1, fill=True)
+            pdf.ln(); pdf.set_text_color(0, 0, 0)
+            for m in mc_hits[:80]:
+                pdf.set_fill_color(255, 245, 230); pdf.set_font("Helvetica", "", 7)
+                url_m   = _pdf_safe(m.get("url", ""), 60)
+                title_m = _pdf_safe(m.get("title", ""), 40)
+                dork_m  = _pdf_safe(m.get("dork", ""), 25)
+                for val, w in zip([url_m, title_m, dork_m], [90, 60, 30]):
+                    pdf.cell(w, 5, val, border=1, fill=True)
+                pdf.ln()
+
+    pdf.output(path)
+    print(f"[+] PDF report saved: {path}")
@@ -0,0 +1,525 @@
+"""
+sources/helpers/scanner.py
+Recursive Avalanche Engine for NOX autoscan.
+
+Pipeline per asset (sequential phases):
+  Phase 1 — Breach scan
+  Phase 2 — Hash crack (non-blocking, on breach results)
+  Phase 3 — Dork
+  Phase 4 — Scrape
+  → Harvest new identifiers from all phases
+  → Reinject every new unique identifier (not seen before) recursively
+"""
+
+import asyncio
+import logging
+import re
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple
+
+if TYPE_CHECKING:
+    from nox import Orchestrator
+
+_syslog = logging.getLogger("nox.system")
+
+_EMAIL_RE    = re.compile(r"[\w.+-]+@[\w-]+\.[\w.]+")
+_USERNAME_RE = re.compile(r"(?:github\.com|twitter\.com|linkedin\.com/in|reddit\.com/u)/([A-Za-z0-9_.-]{3,39})", re.I)
+_PHONE_RE    = re.compile(r"\+\d[\d\s.\-()]{7,14}\d|\b\d{3}[\s.\-]\d{3}[\s.\-]\d{4}\b")
+_NAME_RE     = re.compile(r"\b([A-Z][a-z]{1,20}(?:\s+[A-Z][a-z]{1,20}){1,3})\b")
+
+_DORK_LIMIT  = 20
+_PIVOT_TYPES = {"email", "username", "phone", "name", "ip", "domain"}
+
+
+def _cfg_depth(orc=None) -> int:
+    # A7/A10: read from orchestrator config if available
+    if orc is not None:
+        cfg = getattr(orc, "config", None)
+        if cfg is not None:
+            v = getattr(cfg, "pivot_depth", None)
+            if v is not None:
+                return int(v)
+    try:
+        from nox import Cfg  # type: ignore
+        return Cfg.PIVOT_DEPTH
+    except ImportError:
+        return 2
+
+
+def _cfg_concurrency(orc=None) -> int:
+    # A7: read from orchestrator config if available
+    if orc is not None:
+        cfg = getattr(orc, "config", None)
+        if cfg is not None:
+            v = getattr(cfg, "concurrency", None)
+            if v is not None:
+                return int(v)
+    try:
+        from nox import Cfg  # type: ignore
+        return Cfg.CONCURRENCY
+    except ImportError:
+        return 15
+
+
+def _out(level: str, msg: str) -> None:
+    try:
+        from nox import out as _nox_out  # type: ignore
+        _nox_out(level, msg)
+    except Exception:
+        import sys
+        print(f"[{level}] {msg}", file=sys.stderr)
+
+
+def _extract_ids_from_text(text: str, exclude: str = "") -> List[Tuple[str, str]]:
+    """Extract pivotable identifiers from free text, excluding the current asset."""
+    found: List[Tuple[str, str]] = []
+    excl = exclude.lower()
+    for m in _EMAIL_RE.findall(text):
+        v = m.lower()
+        if v != excl:
+            found.append((v, "email"))
+    for m in _USERNAME_RE.findall(text):
+        v = m.lower()
+        if v != excl:
+            found.append((v, "username"))
+    for m in _PHONE_RE.findall(text):
+        clean = re.sub(r"[\s.\-()]", "", m)
+        if 8 <= len(clean) <= 15 and clean != excl:
+            found.append((clean, "phone"))
+    for m in _NAME_RE.findall(text):
+        if len(m.split()) >= 2 and m.lower() != excl:
+            found.append((m, "name"))
+    return found
+
+
+def _ids_from_records(records: list, exclude: str = "") -> List[Tuple[str, str, str]]:
+    """
+    Extract pivotable identifiers from breach records.
+    Returns (value, qtype, ref) where ref is the source/breach name for logging.
+    """
+    found: List[Tuple[str, str, str]] = []
+    excl = exclude.lower()
+    for r in records:
+        src = getattr(r, "source", "") or ""
+        breach = getattr(r, "breach_name", "") or src
+        for val, qtype in [
+            (getattr(r, "email",      ""), "email"),
+            (getattr(r, "username",   ""), "username"),
+            (getattr(r, "phone",      ""), "phone"),
+            (getattr(r, "full_name",  ""), "name"),
+            (getattr(r, "ip_address", ""), "ip"),
+            (getattr(r, "domain",     ""), "domain"),
+        ]:
+            if val and len(val) > 2 and val.lower() != excl:
+                found.append((val.strip(), qtype, breach))
+        meta = getattr(r, "metadata", {}) or {}
+        for em in meta.get("emails", []):
+            if em and em.lower() != excl:
+                found.append((em.lower(), "email", breach))
+    return found
+
+
+# ── Pivot log entry schema ─────────────────────────────────────────────────
+# {
+#   "asset":      str,         # identifier scanned
+#   "qtype":      str,         # email/username/phone/name/domain/ip
+#   "depth":      int,         # 0=seed, 1=first pivot, …
+#   "parent":     str|None,    # asset that discovered this one
+#   "found_in":   str,         # phase that found this asset: seed/breach/dork/scrape/hash_crack
+#   "records":    int,         # breach records found for this asset
+#   "dorks":      int,         # dork hits found for this asset
+#   "scrape":     int,         # scrape items found for this asset
+#   "children":   List[dict],  # [{asset, qtype, found_in, ref}] — new assets discovered
+#   "cracked":    List[str],   # plaintexts cracked from hashes in breach results
+# }
+
+
+class AvalancheScanner:
+    def __init__(self, orchestrator: "Orchestrator") -> None:
+        self._orc             = orchestrator
+        self.seen_assets: Set[str]  = set()
+        # A2: single semaphore for the entire run, created lazily inside the event loop
+        self._sem: Optional[asyncio.Semaphore] = None
+        self._all_records: List     = []
+        self._dork_hits:   List[dict] = []
+        self._seen_dork_urls: Set[str] = set()
+        # A6: scrape_hits merged atomically per _do_process call
+        self._scrape_hits: Dict     = {"pastes": [], "credentials": [], "hashes": [],
+                                       "telegram": [], "dork_misconfigs": []}
+        self._max_depth: int        = 0
+        self._in_flight: Dict[str, asyncio.Future] = {}
+        self.pivot_log: List[dict]  = []
+        # A8: global set to prevent duplicate entries in discovered_assets
+        self._seen_discovered: Set[str] = set()
+        self.discovered_assets: List[dict] = []
+
+    def _get_sem(self) -> asyncio.Semaphore:
+        # A2: semaphore created once per run, shared across all coroutines
+        if self._sem is None:
+            self._sem = asyncio.Semaphore(_cfg_concurrency(self._orc))
+        return self._sem
+
+    async def run(self, target: str) -> tuple:
+        # A9: respect no_pivot flag from config
+        cfg = getattr(self._orc, "config", None)
+        no_pivot = getattr(cfg, "no_pivot", False) if cfg else False
+        if no_pivot:
+            try:
+                from nox import Detect  # type: ignore
+                qtype = Detect.qtype(target)
+            except ImportError:
+                qtype = "email"
+            async with self._get_sem():
+                try:
+                    records = await self._orc._full_async_scan(target, qtype)
+                except Exception:
+                    records = []
+            self._all_records.extend(records)
+            self.seen_assets.add(target.lower().strip())
+            self.pivot_log.append({
+                "asset": target, "qtype": qtype, "depth": 0, "parent": None,
+                "found_in": "seed", "records": len(records), "dorks": 0,
+                "scrape": 0, "children": [], "cracked": [],
+            })
+            return self._all_records, self._dork_hits, self._scrape_hits
+        await self._process(target, depth=0, parent=None, found_in="seed")
+        return self._all_records, self._dork_hits, self._scrape_hits
+
+    def get_discovered_assets(self) -> List[dict]:
+        """Return flat list of all discovered assets with full provenance."""
+        return self.discovered_assets
+
+    def get_max_depth(self) -> int:
+        return self._max_depth
+
+    # ── Dedup gate ────────────────────────────────────────────────────
+
+    async def _process(self, asset: str, depth: int,
+                       parent: Optional[str], found_in: str) -> None:
+        """Dedup gate: ensures each asset is processed exactly once."""
+        # A10: use per-run depth from orchestrator config
+        if depth > _cfg_depth(self._orc):
+            _syslog.debug("avalanche depth cap reached for %s", asset)
+            return
+
+        key = asset.lower().strip()
+        if not key:
+            return
+
+        # A1: add to seen_assets FIRST (atomic gate) before any other check.
+        # If already present, wait on the in-flight future if one exists, then return.
+        if key in self.seen_assets:
+            if key in self._in_flight:
+                try:
+                    await self._in_flight[key]
+                except Exception:
+                    pass
+            return
+
+        self.seen_assets.add(key)
+
+        # If already in-flight (shouldn't happen after the seen_assets check above,
+        # but guard defensively), wait and return.
+        if key in self._in_flight:
+            try:
+                await self._in_flight[key]
+            except Exception:
+                pass
+            return
+
+        loop = asyncio.get_running_loop()
+        fut: asyncio.Future = loop.create_future()
+        self._in_flight[key] = fut
+
+        try:
+            await self._do_process(asset, depth, parent, found_in)
+        finally:
+            if not fut.done():
+                fut.set_result(None)
+
+    # ── Core pipeline ─────────────────────────────────────────────────
+
+    async def _do_process(self, asset: str, depth: int,
+                          parent: Optional[str], found_in: str) -> None:
+        """
+        Sequential pipeline:
+          Phase 1 — Breach scan
+          Phase 2 — Hash crack (concurrent, non-blocking)
+          Phase 3 — Dork
+          Phase 4 — Scrape
+          → Harvest all new identifiers with phase+ref annotation
+          → Reinject every unseen identifier
+        """
+        if depth > self._max_depth:
+            self._max_depth = depth
+
+        try:
+            from nox import Detect  # type: ignore
+            qtype = Detect.qtype(asset)
+        except ImportError:
+            qtype = "email"
+
+        indent = "  " * depth
+        _out("pivot" if depth > 0 else "info",
+             f"{indent}[depth={depth}] {'↳' if depth > 0 else '◉'} {asset} ({qtype})"
+             + (f"  ← {found_in} via {parent}" if parent else "  [SEED]"))
+        _syslog.info("AVALANCHE asset=%s depth=%d parent=%s found_in=%s",
+                     asset, depth, parent or "—", found_in)
+
+        # ── Phase 1: Breach scan ──────────────────────────────────────
+        async with self._get_sem():
+            try:
+                records: List = await self._orc._full_async_scan(asset, qtype)
+            except Exception as exc:
+                _syslog.warning("BREACH_FAIL asset=%s err=%s", asset, exc)
+                records = []
+
+        _out("ok" if records else "dim",
+             f"{indent}  [breach] {len(records)} records")
+        _syslog.info("BREACH_DONE asset=%s records=%d", asset, len(records))
+        self._all_records.extend(records)
+
+        # ── Phase 2: Hash crack (non-blocking) ────────────────────────
+        cracked_plaintexts: List[str] = []
+        try:
+            from sources.helpers.cracker import detect_hash  # type: ignore
+            import aiohttp as _aio  # type: ignore
+            async with _aio.ClientSession(connector=_aio.TCPConnector(limit=5)) as _cs:
+                crack_tasks = [
+                    _crack_and_inject(_cs, getattr(r, "password_hash", ""), r,
+                                      self.seen_assets, self._all_records,
+                                      self, depth, asset, cracked_plaintexts)
+                    for r in records
+                    if getattr(r, "password_hash", "") and not getattr(r, "password", "")
+                    and detect_hash(getattr(r, "password_hash", ""))
+                ]
+                if crack_tasks:
+                    await asyncio.gather(*crack_tasks, return_exceptions=True)
+        except ImportError:
+            pass
+
+        # ── Phase 3: Dork ─────────────────────────────────────────────
+        _out("info", f"{indent}  [dork] querying for {asset}…")
+        try:
+            dork_res = await self._async_dork(asset, qtype)
+        except Exception as exc:
+            _syslog.warning("DORK_FAIL asset=%s err=%s", asset, exc)
+            dork_res = []
+
+        dork_count = 0
+        for hit in (dork_res or [])[:_DORK_LIMIT]:
+            url = hit.get("url", "") or hit.get("title", "")
+            if url and url not in self._seen_dork_urls:
+                self._seen_dork_urls.add(url)
+                hit["pivot_asset"] = asset
+                hit["pivot_depth"] = depth
+                self._dork_hits.append(hit)
+                dork_count += 1
+        _out("ok" if dork_count else "dim",
+             f"{indent}  [dork] {dork_count} hits")
+        _syslog.info("DORK_DONE asset=%s hits=%d", asset, dork_count)
+
+        # ── Phase 4: Scrape ───────────────────────────────────────────
+        _out("info", f"{indent}  [scrape] querying for {asset}…")
+        try:
+            scrape_res = await self._async_scrape(asset)
+        except Exception as exc:
+            _syslog.warning("SCRAPE_FAIL asset=%s err=%s", asset, exc)
+            scrape_res = {}
+
+        # A6: collect scrape results locally, then merge atomically
+        scrape_count = 0
+        local_scrape: Dict = {k: [] for k in self._scrape_hits}
+        for k in self._scrape_hits:
+            for item in (scrape_res or {}).get(k, []):
+                if isinstance(item, dict):
+                    item["pivot_asset"] = asset
+                    item["pivot_depth"] = depth
+                local_scrape[k].append(item)
+                scrape_count += 1
+        # Atomic merge into shared dict (single-threaded event loop — safe)
+        for k, items in local_scrape.items():
+            self._scrape_hits[k].extend(items)
+        _out("ok" if scrape_count else "dim",
+             f"{indent}  [scrape] {scrape_count} items")
+        _syslog.info("SCRAPE_DONE asset=%s items=%d", asset, scrape_count)
+
+        # ── Harvest new identifiers with phase+ref annotation ─────────
+        # Each entry: (value, qtype, found_in_phase, ref)
+        new_ids: List[Tuple[str, str, str, str]] = []
+
+        # From breach records
+        for val, vqtype, ref in _ids_from_records(records, exclude=asset):
+            if vqtype in _PIVOT_TYPES:
+                new_ids.append((val, vqtype, "breach", ref))
+
+        # From dork hits
+        for hit in (dork_res or [])[:_DORK_LIMIT]:
+            url   = hit.get("url", "")
+            dork  = hit.get("dork", "")
+            ref   = url or dork
+            text  = f"{hit.get('title','')} {hit.get('snippet','')} {url} {dork}"
+            for val, vqtype in _extract_ids_from_text(text, exclude=asset):
+                if vqtype in _PIVOT_TYPES:
+                    new_ids.append((val, vqtype, "dork", ref[:120]))
+
+        # From scrape results
+        for cred in (scrape_res or {}).get("credentials", []):
+            raw = cred.get("raw", "")
+            ref = f"paste:{cred.get('paste_id','')}" or cred.get("source", "scrape")
+            for val, vqtype in _extract_ids_from_text(raw, exclude=asset):
+                if vqtype in _PIVOT_TYPES:
+                    new_ids.append((val, vqtype, "scrape", ref))
+        for tg in (scrape_res or {}).get("telegram", []):
+            ref = f"t.me/{tg.get('channel','')}"
+            for val, vqtype in _extract_ids_from_text(tg.get("text", ""), exclude=asset):
+                if vqtype in _PIVOT_TYPES:
+                    new_ids.append((val, vqtype, "scrape", ref))
+        for mc in (scrape_res or {}).get("dork_misconfigs", []):
+            ref = mc.get("url", mc.get("title", "misconfig"))
+            for val, vqtype in _extract_ids_from_text(
+                    f"{mc.get('title','')} {mc.get('snippet','')}", exclude=asset):
+                if vqtype in _PIVOT_TYPES:
+                    new_ids.append((val, vqtype, "scrape", ref[:120]))
+
+        # ── Deduplicate and queue children ────────────────────────────
+        children: List[dict] = []
+        child_tasks = []
+        queued: Set[str] = set()
+
+        for val, vqtype, phase, ref in new_ids:
+            child_key = val.lower().strip()
+            if not child_key or child_key in self.seen_assets or child_key in queued:
+                continue
+            queued.add(child_key)
+            child_entry = {"asset": val, "qtype": vqtype, "found_in": phase, "ref": ref}
+            children.append(child_entry)
+            # A8: prevent duplicate entries in discovered_assets across parallel parents
+            if child_key not in self._seen_discovered:
+                self._seen_discovered.add(child_key)
+                self.discovered_assets.append({
+                    "asset":    val,
+                    "qtype":    vqtype,
+                    "phase":    phase,
+                    "ref":      ref,
+                    "parent":   asset,
+                    "depth":    depth + 1,
+                })
+            _out("pivot",
+                 f"{indent}  ↳ new asset [{phase}]: {val} ({vqtype})  ref: {ref[:60]}")
+            _syslog.info("PIVOT_QUEUE asset=%s qtype=%s phase=%s ref=%s parent=%s depth=%d",
+                         val, vqtype, phase, ref[:80], asset, depth + 1)
+            child_tasks.append(
+                self._process(val, depth + 1, parent=asset, found_in=phase)
+            )
+
+        # A5: run child tasks FIRST, then append pivot_log so the log reflects actual outcomes
+        if child_tasks:
+            _out("info", f"{indent}  → reinjecting {len(child_tasks)} new asset(s)…")
+            await asyncio.gather(*child_tasks, return_exceptions=True)
+
+        # ── Log this node (after children complete — A5) ──────────────
+        self.pivot_log.append({
+            "asset":    asset,
+            "qtype":    qtype,
+            "depth":    depth,
+            "parent":   parent,
+            "found_in": found_in,
+            "records":  len(records),
+            "dorks":    dork_count,
+            "scrape":   scrape_count,
+            "children": children,
+            "cracked":  cracked_plaintexts or [],
+        })
+
+    # ── Dork dispatcher ───────────────────────────────────────────────
+
+    async def _async_dork(self, asset: str, qtype: str = "email") -> list:
+        try:
+            import aiohttp as _aio  # type: ignore
+            import ssl as _ssl
+            connector = _aio.TCPConnector(limit=10, ssl=_ssl.create_default_context(), family=0)
+            async with _aio.ClientSession(connector=connector) as session:
+                recs = await self._orc.dorking_engine.async_search(session, asset, qtype)
+            return [
+                {
+                    "url":     r.raw_data.get("url", "") if hasattr(r, "raw_data") else "",
+                    "title":   r.raw_data.get("url", r.raw_data.get("dork", "")) if hasattr(r, "raw_data") else "",
+                    "snippet": "",
+                    "dork":    r.raw_data.get("dork", "") if hasattr(r, "raw_data") else "",
+                    "engine":  "DDG",
+                }
+                for r in recs
+            ]
+        except ImportError:
+            loop = asyncio.get_running_loop()
+            result = await loop.run_in_executor(None, self._orc.dork, asset)
+            return result if isinstance(result, list) else []
+        except Exception as exc:
+            _syslog.debug("DORK_ERR asset=%s err=%s", asset, exc)
+            return []
+
+    # ── Scrape dispatcher ─────────────────────────────────────────────
+
+    async def _async_scrape(self, asset: str) -> dict:
+        # A3: instantiate a fresh Session + ScrapeEngine per call to avoid sharing
+        # a non-thread-safe requests.Session / cloudscraper across concurrent coroutines.
+        _empty: dict = {"pastes": [], "credentials": [], "hashes": [],
+                        "telegram": [], "dork_misconfigs": []}
+        try:
+            loop = asyncio.get_running_loop()
+            try:
+                from nox import Session, NoxConfig, ScrapeEngine  # type: ignore
+                _cfg = getattr(self._orc, "config", None) or NoxConfig()
+                _session = Session(_cfg)
+                _engine = ScrapeEngine(_session, self._orc.db)
+                qtype = "email"
+                try:
+                    from nox import Detect  # type: ignore
+                    qtype = Detect.qtype(asset)
+                except Exception:
+                    pass
+                result = await loop.run_in_executor(None, _engine.run, asset, qtype)
+            except Exception:
+                result = await loop.run_in_executor(None, self._orc.scrape, asset)
+            return result if isinstance(result, dict) else _empty
+        except Exception as exc:
+            _syslog.debug("SCRAPE_ERR asset=%s err=%s", asset, exc)
+            return _empty
+
+
+# ── Hash crack helper ──────────────────────────────────────────────────────
+
+async def _crack_and_inject(session, hash_value: str, record_ref,
+                             seen_assets: Set[str], all_records: list,
+                             scanner: "AvalancheScanner",
+                             depth: int, parent_asset: str,
+                             cracked_out: List[str]) -> None:
+    from sources.helpers.cracker import detect_hash, async_crack, CRACK_TIMEOUT  # type: ignore
+    hash_type = detect_hash(hash_value)
+    if not hash_type:
+        return
+    try:
+        plaintext = await asyncio.wait_for(
+            async_crack(session, hash_value, hash_type), timeout=CRACK_TIMEOUT)
+    except (asyncio.TimeoutError, Exception) as exc:
+        _syslog.debug("CRACK_FAIL hash=%s reason=%s", hash_value[:16], exc)
+        return
+
+    if not plaintext:
+        _syslog.debug("CRACK_FAIL hash=%s reason=no_result", hash_value[:16])
+        return
+
+    record_ref.password  = plaintext
+    record_ref.hash_type = hash_type
+    if "Cracked" not in (record_ref.data_types or []):
+        record_ref.data_types = list(record_ref.data_types) + ["Cracked"]
+    _syslog.info("CRACK_OK hash=%s plain=%s parent=%s", hash_value[:16], plaintext, parent_asset)
+    _out("ok", f"  [crack] {hash_value[:16]}… → {plaintext}  (from {parent_asset})")
+    cracked_out.append(plaintext)
+
+    # A4: inject cracked plaintext as qtype="password" — NOT as username.
+    # Only pivot on it if sources support password-recycling queries.
+    key = plaintext.lower()
+    if key not in seen_assets and depth + 1 <= _cfg_depth(scanner._orc):
+        await scanner._process(plaintext, depth + 1,
+                               parent=parent_asset, found_in="hash_crack")