release: v1.0.2

- 124 sources (+1 xposedornot, bgpview replaced with ripestat) - Fix gravatar MD5 transform, fofa base64 query encoding - Fix misp_search URL resolution, threatconnect HMAC placeholder - Fix spycloud, duckduckgo, mailboxlayer/numverify/ipstack/ipinfodb endpoints - Fix DeHashEngine v1→v2, DorkEngine engine label, backup_endpoints consumed - Fix Retry-After HTTP-date parsing, Hashmob API schema, FIPS hashlib crash - Fix DB.close() event loop leak, _random_headers CH-UA override - Add query_transform mechanism (md5_lower, fofa_domain) - Lower scores: spyonweb, pipl_search, twitter_v2, hudsonrock rate_limit - Clean all internal tracking comments, fix Italian docstring
2026-06-08 16:07:17 +00:00 · 2026-04-14 21:18:30 +02:00
parent cf4428329e
commit 9bf66d3e50
26 changed files with 345 additions and 205 deletions
@@ -29,13 +29,13 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
    "alienvault_otx_malware":  {"display": "AlienVault OTX (Malware)",   "public": True},
    "alienvault_otx_user":     {"display": "AlienVault OTX (User)",      "public": True},
    "anubis_subdomains":       {"display": "Anubis Subdomains",          "public": True},
-    "bgpview_ip":              {"display": "BGPView IP",                 "public": True},
-    "checkleaked":             {"display": "CheckLeaked",                "public": True},
+    "ripestat_ip":             {"display": "RIPE Stat IP",               "public": True},
+    "xposedornot":             {"display": "XposedOrNot",                  "public": True},
    "crt_sh":                  {"display": "crt.sh",                     "public": True},
    "cve_search":              {"display": "CVE Search",                 "public": True},
    "cxsecurity":              {"display": "CXSecurity",                 "public": True},
    "duckduckgo_api":          {"display": "Google / DDG Dorks",         "public": True},
-    "emailrep_io":             {"display": "EmailRep.io",                "public": True},
+    "emailrep_io":             {"display": "EmailRep.io",                "public": False},
    "github_users":            {"display": "GitHub Users",               "public": True},
    "gitlab_search":           {"display": "GitLab Search",              "public": True},
    "gravatar":                {"display": "Gravatar",                   "public": True},
@@ -44,7 +44,10 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
    "hackertarget_hostsearch": {"display": "HackerTarget Host Search",   "public": True},
    "hackertarget_reverseip":  {"display": "HackerTarget Reverse IP",    "public": True},
    "hackertarget_whois":      {"display": "WHOIS (HackerTarget)",       "public": True},
-    "hudsonrock_osint":        {"display": "HudsonRock OSINT",           "public": True},
+    "ipapi_is":                {"display": "ipapi.is",                      "public": True},
+    "circl_hashlookup":        {"display": "CIRCL Hash Lookup",             "public": True},
+    "proxynova_comb":          {"display": "ProxyNova COMB",                "public": True},
+    "shodan_internetdb":       {"display": "Shodan InternetDB",             "public": True},
    "ipapi_co":                {"display": "ipapi.co",                   "public": True},
    "ipinfo_io":               {"display": "IPInfo.io",                  "public": True},
    "ipvigilante":             {"display": "IPVigilante",                "public": True},
@@ -59,14 +62,10 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
    "reddit_user":             {"display": "Reddit User",                "public": True},
    "robtex_ip":               {"display": "Robtex IP",                  "public": True},
    "scamwatcher":             {"display": "ScamWatcher",                "public": True},
-    "social_scan":             {"display": "Social Scan",                "public": True},
    "sublist3r_api":           {"display": "Sublist3r API",              "public": True},
-    "threatcrowd_domain":      {"display": "ThreatCrowd (Domain)",       "public": True},
-    "threatcrowd_email":       {"display": "ThreatCrowd (Email)",        "public": True},
    "threatminer_domain":      {"display": "ThreatMiner (Domain)",       "public": True},
    "threatminer_ip":          {"display": "ThreatMiner (IP)",           "public": True},
    "urlscan_search":          {"display": "URLScan.io",                 "public": True},
-    "vigilante_pw":            {"display": "Vigilante.pw",               "public": True},
    "wayback_machine":         {"display": "Wayback Machine",            "public": True},
    # ── Private / key-required ────────────────────────────────────────
    "ABSTRACT_API_KEY":         {"display": "Abstract Email Validation", "public": False},
@@ -78,7 +77,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
    "BING_API_KEY":             {"display": "Bing Search API",           "public": False},
    "CENSYS_AUTH_BASE64":       {"display": "Censys",                    "public": False},
    "CIRCL_AUTH_BASE64":        {"display": "CIRCL.lu PDNS",             "public": False},
-    "CIT0DAY_API_KEY":          {"display": "Cit0day",                   "public": False},
    "SEON_API_KEY":             {"display": "SEON Email Intelligence",   "public": False},
    "CRIMINALIP_API_KEY":       {"display": "CriminalIP",                "public": False},
    "DEHASHED_AUTH_BASE64":     {"display": "Dehashed",                  "public": False},
@@ -108,7 +106,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
    "JOE_API_KEY":              {"display": "Joe Sandbox",               "public": False},
    "LEAKCHECK_API_KEY":        {"display": "LeakCheck",                 "public": False},
    "LEAKIX_API_KEY":           {"display": "LeakIX",                    "public": False},
-    "LEAKSTATS_API_KEY":        {"display": "LeakStats.pw",              "public": False},
    "MAILBOX_API_KEY":          {"display": "Mailboxlayer",              "public": False},
    "MALSHARE_API_KEY":         {"display": "MalShare",                  "public": False},
    "METADEFENDER_API_KEY":     {"display": "MetaDefender",              "public": False},
@@ -124,7 +121,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
    "SNUSBASE_API_KEY":         {"display": "Snusbase",                  "public": False},
    "SPYCLOUD_API_KEY":         {"display": "SpyCloud",                  "public": False},
    "SPYONWEB_API_KEY":         {"display": "SpyOnWeb",                  "public": False},
-    "SPYSE_API_KEY":            {"display": "Spyse",                     "public": False},
    "TC_API_KEY":               {"display": "ThreatConnect",             "public": False},
    "FLARE_API_KEY":            {"display": "Flare LeaksDB",              "public": False},
    "TP_API_KEY":               {"display": "ThreatPortal",              "public": False},
@@ -138,7 +134,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
    "WHOXY_API_KEY":            {"display": "Whoxy WHOIS",               "public": False},
    "ZEROBOUNCE_API_KEY":       {"display": "ZeroBounce",                "public": False},
    "ZOOMEYE_API_KEY":          {"display": "ZoomEye",                   "public": False},
-    # ── Added in v1.0.1 ───────────────────────────────────────────────
    "EMAILREP_API_KEY":         {"display": "EmailRep.io",               "public": False},
    "HASHES_COM_API_KEY":       {"display": "Hashes.com (crack API)",    "public": False},
    "THREATFOX_API_KEY":        {"display": "ThreatFox (abuse.ch)",      "public": False},
@@ -146,8 +141,8 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
    "MALWAREBAZAAR_API_KEY":    {"display": "MalwareBazaar (abuse.ch)",  "public": False},
    "FULLHUNT_API_KEY":         {"display": "FullHunt (attack surface)", "public": False},
    "NETLAS_API_KEY":           {"display": "Netlas.io (internet scanner)", "public": False},
-    # ── Added in v1.0.2 ───────────────────────────────────────────────
    "LEAK_LOOKUP_API_KEY":      {"display": "Leak-Lookup",               "public": False},
+    "MISP_URL":                 {"display": "MISP Instance URL",         "public": False},
 }

 _PRIVATE_KEYS = {k: v for k, v in SERVICE_REGISTRY.items() if not v["public"]}
@@ -12,9 +12,9 @@ import logging
 import re
 from typing import List, Optional, Tuple

-# C2: MD5 and NTLM share the same 32-char hex pattern.
-# We list md5 first (most common in breach data) but also accept ntlm
-# so callers can query NTLM-specific APIs when needed.
+# MD5 and NTLM share the same 32-char hex pattern. MD5 is listed first as it
+# is the most common type in breach data. async_crack queries both md5 and
+# ntlm-specific APIs for any 32-char hash.
 _PATTERNS: List[Tuple[str, re.Pattern]] = [
    ("bcrypt",  re.compile(r"^\$2[aby]?\$\d{2}\$.{53}$")),
    ("sha256",  re.compile(r"^[a-f0-9]{64}$", re.I)),
@@ -130,9 +130,23 @@ def _local_crack_sync_blocking(hash_value: str, hash_type: str) -> Optional[str]
    if not wordlist.exists():
        return None
    h = hash_value.strip().lower()
+    # usedforsecurity=False is required on FIPS-enabled systems (Python 3.9+).
+    # On Python 3.8 the kwarg does not exist, so we fall back gracefully.
+    def _md5(w):
+        try:
+            return _hl.md5(w, usedforsecurity=False).hexdigest()
+        except TypeError:
+            return _hl.md5(w).hexdigest()
+
+    def _sha1(w):
+        try:
+            return _hl.sha1(w, usedforsecurity=False).hexdigest()
+        except TypeError:
+            return _hl.sha1(w).hexdigest()
+
    _hashers = {
-        "md5":    lambda w: _hl.md5(w).hexdigest(),
-        "sha1":   lambda w: _hl.sha1(w).hexdigest(),
+        "md5":    _md5,
+        "sha1":   _sha1,
        "sha256": lambda w: _hl.sha256(w).hexdigest(),
    }
    hasher = _hashers.get(hash_type)
@@ -48,11 +48,11 @@ def _raw(v: Any, maxlen: int = 200) -> str:


 def _pdf_safe(s: str, maxlen: int = 180) -> str:
-    # D4: sanitize for fpdf2 core fonts (latin-1 subset).
+    # Sanitise for fpdf2 core fonts (latin-1 subset).
    # NFKD normalization decomposes accented chars (é→e + combining accent)
    # so common accented Latin characters survive as their base letter.
-    # Truly non-latin-1 chars (Cyrillic, CJK, etc.) become '?' — intentional:
-    # fpdf2 core fonts cannot render them and would raise UnicodeEncodeError.
+    # Truly non-latin-1 chars (Cyrillic, CJK, etc.) become '?' — fpdf2 core
+    # fonts cannot render them and would raise UnicodeEncodeError.
    s = _raw(s, maxlen)
    try:
        import unicodedata
@@ -114,7 +114,7 @@ def render_pivot_chain(data: dict) -> List[str]:
    chain  = data.get("pivot_chain") or []
    target = _raw(data.get("target", "?"))

-    # D2: if pivot_log is available, build chain from it (accurate tree)
+    # Build chain from pivot_log when available — it carries the full tree with depth and provenance.
    pivot_log = data.get("pivot_log") or []
    if pivot_log:
        lines: List[str] = []
@@ -195,14 +195,12 @@ def to_json(data: dict, path: str) -> None:
    dork_results   = data.get("dork_results", []) or []
    scrape_results = data.get("scrape_results", {}) or {}

-    # D3: apply consistent cap (1000) — same as HTML
    _RECORD_CAP = 1000

    out_data = {
        "framework":       f"NOX v{_NOX_VERSION}",
        "generated":       datetime.now().isoformat(),
        "target":          data.get("target", ""),
-        # J3: self-describing metadata block
        "_meta": {
            "scan_id":        hashlib.sha256(
                f"{data.get('target','')}{datetime.now().isoformat()}".encode()
@@ -387,7 +385,6 @@ def to_html(data: dict, path: str) -> None:
 # ── PDF report (fpdf2) ────────────────────────────────────────────────

 def to_pdf(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None:
-    # D1: raise a clear error with install hint if fpdf2 is absent — never silently return.
    try:
        from fpdf import FPDF  # type: ignore
    except ImportError:
@@ -31,7 +31,6 @@ _PIVOT_TYPES = {"email", "username", "phone", "name", "ip", "domain"}


 def _cfg_depth(orc=None) -> int:
-    # A7/A10: read from orchestrator config if available
    if orc is not None:
        cfg = getattr(orc, "config", None)
        if cfg is not None:
@@ -46,7 +45,6 @@ def _cfg_depth(orc=None) -> int:


 def _cfg_concurrency(orc=None) -> int:
-    # A7: read from orchestrator config if available
    if orc is not None:
        cfg = getattr(orc, "config", None)
        if cfg is not None:
@@ -137,29 +135,24 @@ class AvalancheScanner:
    def __init__(self, orchestrator: "Orchestrator") -> None:
        self._orc             = orchestrator
        self.seen_assets: Set[str]  = set()
-        # A2: single semaphore for the entire run, created lazily inside the event loop
        self._sem: Optional[asyncio.Semaphore] = None
        self._all_records: List     = []
        self._dork_hits:   List[dict] = []
        self._seen_dork_urls: Set[str] = set()
-        # A6: scrape_hits merged atomically per _do_process call
        self._scrape_hits: Dict     = {"pastes": [], "credentials": [], "hashes": [],
                                       "telegram": [], "dork_misconfigs": []}
        self._max_depth: int        = 0
        self._in_flight: Dict[str, asyncio.Future] = {}
        self.pivot_log: List[dict]  = []
-        # A8: global set to prevent duplicate entries in discovered_assets
        self._seen_discovered: Set[str] = set()
        self.discovered_assets: List[dict] = []

    def _get_sem(self) -> asyncio.Semaphore:
-        # A2: semaphore created once per run, shared across all coroutines
        if self._sem is None:
            self._sem = asyncio.Semaphore(_cfg_concurrency(self._orc))
        return self._sem

    async def run(self, target: str) -> tuple:
-        # A9: respect no_pivot flag from config
        cfg = getattr(self._orc, "config", None)
        no_pivot = getattr(cfg, "no_pivot", False) if cfg else False
        if no_pivot:
@@ -196,7 +189,6 @@ class AvalancheScanner:
    async def _process(self, asset: str, depth: int,
                       parent: Optional[str], found_in: str) -> None:
        """Dedup gate: ensures each asset is processed exactly once."""
-        # A10: use per-run depth from orchestrator config
        if depth > _cfg_depth(self._orc):
            _syslog.debug("avalanche depth cap reached for %s", asset)
            return
@@ -205,7 +197,7 @@ class AvalancheScanner:
        if not key:
            return

-        # A1: add to seen_assets FIRST (atomic gate) before any other check.
+        # Add to seen_assets before any await to prevent concurrent duplicates.
        # If already present, wait on the in-flight future if one exists, then return.
        if key in self.seen_assets:
            if key in self._in_flight:
@@ -326,7 +318,8 @@ class AvalancheScanner:
            _syslog.warning("SCRAPE_FAIL asset=%s err=%s", asset, exc)
            scrape_res = {}

-        # A6: collect scrape results locally, then merge atomically
+        # Collect scrape results locally then merge into the shared dict.
+        # The event loop is single-threaded so the merge is safe without a lock.
        scrape_count = 0
        local_scrape: Dict = {k: [] for k in self._scrape_hits}
        for k in self._scrape_hits:
@@ -336,7 +329,7 @@ class AvalancheScanner:
                    item["pivot_depth"] = depth
                local_scrape[k].append(item)
                scrape_count += 1
-        # Atomic merge into shared dict (single-threaded event loop — safe)
+        # Merge into shared dict — safe within the single-threaded event loop.
        for k, items in local_scrape.items():
            self._scrape_hits[k].extend(items)
        _out("ok" if scrape_count else "dim",
@@ -393,7 +386,6 @@ class AvalancheScanner:
            queued.add(child_key)
            child_entry = {"asset": val, "qtype": vqtype, "found_in": phase, "ref": ref}
            children.append(child_entry)
-            # A8: prevent duplicate entries in discovered_assets across parallel parents
            if child_key not in self._seen_discovered:
                self._seen_discovered.add(child_key)
                self.discovered_assets.append({
@@ -412,12 +404,12 @@ class AvalancheScanner:
                self._process(val, depth + 1, parent=asset, found_in=phase)
            )

-        # A5: run child tasks FIRST, then append pivot_log so the log reflects actual outcomes
+        # Run child tasks before appending to pivot_log so the log reflects actual outcomes.
        if child_tasks:
            _out("info", f"{indent}  → reinjecting {len(child_tasks)} new asset(s)…")
            await asyncio.gather(*child_tasks, return_exceptions=True)

-        # ── Log this node (after children complete — A5) ──────────────
+        # ── Log this node ─────────────────────────────────────────────
        self.pivot_log.append({
            "asset":    asset,
            "qtype":    qtype,
@@ -461,8 +453,8 @@ class AvalancheScanner:
    # ── Scrape dispatcher ─────────────────────────────────────────────

    async def _async_scrape(self, asset: str) -> dict:
-        # A3: instantiate a fresh Session + ScrapeEngine per call to avoid sharing
-        # a non-thread-safe requests.Session / cloudscraper across concurrent coroutines.
+        # Instantiate a fresh Session and ScrapeEngine per call — requests.Session
+        # and cloudscraper are not safe to share across concurrent coroutines.
        _empty: dict = {"pastes": [], "credentials": [], "hashes": [],
                        "telegram": [], "dork_misconfigs": []}
        try:
@@ -517,8 +509,7 @@ async def _crack_and_inject(session, hash_value: str, record_ref,
    _out("ok", f"  [crack] {hash_value[:16]}… → {plaintext}  (from {parent_asset})")
    cracked_out.append(plaintext)

-    # A4: inject cracked plaintext as qtype="password" — NOT as username.
-    # Only pivot on it if sources support password-recycling queries.
+    # Inject the cracked plaintext as a password-recycling pivot seed.
    key = plaintext.lower()
    if key not in seen_assets and depth + 1 <= _cfg_depth(scanner._orc):
        await scanner._process(plaintext, depth + 1,