release: v1.0.2

- 124 sources (+1 xposedornot, bgpview replaced with ripestat)
- Fix gravatar MD5 transform, fofa base64 query encoding
- Fix misp_search URL resolution, threatconnect HMAC placeholder
- Fix spycloud, duckduckgo, mailboxlayer/numverify/ipstack/ipinfodb endpoints
- Fix DeHashEngine v1→v2, DorkEngine engine label, backup_endpoints consumed
- Fix Retry-After HTTP-date parsing, Hashmob API schema, FIPS hashlib crash
- Fix DB.close() event loop leak, _random_headers CH-UA override
- Add query_transform mechanism (md5_lower, fofa_domain)
- Lower scores: spyonweb, pipl_search, twitter_v2, hudsonrock rate_limit
- Clean all internal tracking comments, fix Italian docstring
This commit is contained in:
nox-project
2026-04-14 21:18:30 +02:00
parent cf4428329e
commit 9bf66d3e50
26 changed files with 345 additions and 205 deletions
+8 -13
View File
@@ -29,13 +29,13 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
"alienvault_otx_malware": {"display": "AlienVault OTX (Malware)", "public": True},
"alienvault_otx_user": {"display": "AlienVault OTX (User)", "public": True},
"anubis_subdomains": {"display": "Anubis Subdomains", "public": True},
"bgpview_ip": {"display": "BGPView IP", "public": True},
"checkleaked": {"display": "CheckLeaked", "public": True},
"ripestat_ip": {"display": "RIPE Stat IP", "public": True},
"xposedornot": {"display": "XposedOrNot", "public": True},
"crt_sh": {"display": "crt.sh", "public": True},
"cve_search": {"display": "CVE Search", "public": True},
"cxsecurity": {"display": "CXSecurity", "public": True},
"duckduckgo_api": {"display": "Google / DDG Dorks", "public": True},
"emailrep_io": {"display": "EmailRep.io", "public": True},
"emailrep_io": {"display": "EmailRep.io", "public": False},
"github_users": {"display": "GitHub Users", "public": True},
"gitlab_search": {"display": "GitLab Search", "public": True},
"gravatar": {"display": "Gravatar", "public": True},
@@ -44,7 +44,10 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
"hackertarget_hostsearch": {"display": "HackerTarget Host Search", "public": True},
"hackertarget_reverseip": {"display": "HackerTarget Reverse IP", "public": True},
"hackertarget_whois": {"display": "WHOIS (HackerTarget)", "public": True},
"hudsonrock_osint": {"display": "HudsonRock OSINT", "public": True},
"ipapi_is": {"display": "ipapi.is", "public": True},
"circl_hashlookup": {"display": "CIRCL Hash Lookup", "public": True},
"proxynova_comb": {"display": "ProxyNova COMB", "public": True},
"shodan_internetdb": {"display": "Shodan InternetDB", "public": True},
"ipapi_co": {"display": "ipapi.co", "public": True},
"ipinfo_io": {"display": "IPInfo.io", "public": True},
"ipvigilante": {"display": "IPVigilante", "public": True},
@@ -59,14 +62,10 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
"reddit_user": {"display": "Reddit User", "public": True},
"robtex_ip": {"display": "Robtex IP", "public": True},
"scamwatcher": {"display": "ScamWatcher", "public": True},
"social_scan": {"display": "Social Scan", "public": True},
"sublist3r_api": {"display": "Sublist3r API", "public": True},
"threatcrowd_domain": {"display": "ThreatCrowd (Domain)", "public": True},
"threatcrowd_email": {"display": "ThreatCrowd (Email)", "public": True},
"threatminer_domain": {"display": "ThreatMiner (Domain)", "public": True},
"threatminer_ip": {"display": "ThreatMiner (IP)", "public": True},
"urlscan_search": {"display": "URLScan.io", "public": True},
"vigilante_pw": {"display": "Vigilante.pw", "public": True},
"wayback_machine": {"display": "Wayback Machine", "public": True},
# ── Private / key-required ────────────────────────────────────────
"ABSTRACT_API_KEY": {"display": "Abstract Email Validation", "public": False},
@@ -78,7 +77,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
"BING_API_KEY": {"display": "Bing Search API", "public": False},
"CENSYS_AUTH_BASE64": {"display": "Censys", "public": False},
"CIRCL_AUTH_BASE64": {"display": "CIRCL.lu PDNS", "public": False},
"CIT0DAY_API_KEY": {"display": "Cit0day", "public": False},
"SEON_API_KEY": {"display": "SEON Email Intelligence", "public": False},
"CRIMINALIP_API_KEY": {"display": "CriminalIP", "public": False},
"DEHASHED_AUTH_BASE64": {"display": "Dehashed", "public": False},
@@ -108,7 +106,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
"JOE_API_KEY": {"display": "Joe Sandbox", "public": False},
"LEAKCHECK_API_KEY": {"display": "LeakCheck", "public": False},
"LEAKIX_API_KEY": {"display": "LeakIX", "public": False},
"LEAKSTATS_API_KEY": {"display": "LeakStats.pw", "public": False},
"MAILBOX_API_KEY": {"display": "Mailboxlayer", "public": False},
"MALSHARE_API_KEY": {"display": "MalShare", "public": False},
"METADEFENDER_API_KEY": {"display": "MetaDefender", "public": False},
@@ -124,7 +121,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
"SNUSBASE_API_KEY": {"display": "Snusbase", "public": False},
"SPYCLOUD_API_KEY": {"display": "SpyCloud", "public": False},
"SPYONWEB_API_KEY": {"display": "SpyOnWeb", "public": False},
"SPYSE_API_KEY": {"display": "Spyse", "public": False},
"TC_API_KEY": {"display": "ThreatConnect", "public": False},
"FLARE_API_KEY": {"display": "Flare LeaksDB", "public": False},
"TP_API_KEY": {"display": "ThreatPortal", "public": False},
@@ -138,7 +134,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
"WHOXY_API_KEY": {"display": "Whoxy WHOIS", "public": False},
"ZEROBOUNCE_API_KEY": {"display": "ZeroBounce", "public": False},
"ZOOMEYE_API_KEY": {"display": "ZoomEye", "public": False},
# ── Added in v1.0.1 ───────────────────────────────────────────────
"EMAILREP_API_KEY": {"display": "EmailRep.io", "public": False},
"HASHES_COM_API_KEY": {"display": "Hashes.com (crack API)", "public": False},
"THREATFOX_API_KEY": {"display": "ThreatFox (abuse.ch)", "public": False},
@@ -146,8 +141,8 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
"MALWAREBAZAAR_API_KEY": {"display": "MalwareBazaar (abuse.ch)", "public": False},
"FULLHUNT_API_KEY": {"display": "FullHunt (attack surface)", "public": False},
"NETLAS_API_KEY": {"display": "Netlas.io (internet scanner)", "public": False},
# ── Added in v1.0.2 ───────────────────────────────────────────────
"LEAK_LOOKUP_API_KEY": {"display": "Leak-Lookup", "public": False},
"MISP_URL": {"display": "MISP Instance URL", "public": False},
}
_PRIVATE_KEYS = {k: v for k, v in SERVICE_REGISTRY.items() if not v["public"]}
+19 -5
View File
@@ -12,9 +12,9 @@ import logging
import re
from typing import List, Optional, Tuple
# C2: MD5 and NTLM share the same 32-char hex pattern.
# We list md5 first (most common in breach data) but also accept ntlm
# so callers can query NTLM-specific APIs when needed.
# MD5 and NTLM share the same 32-char hex pattern. MD5 is listed first as it
# is the most common type in breach data. async_crack queries both md5 and
# ntlm-specific APIs for any 32-char hash.
_PATTERNS: List[Tuple[str, re.Pattern]] = [
("bcrypt", re.compile(r"^\$2[aby]?\$\d{2}\$.{53}$")),
("sha256", re.compile(r"^[a-f0-9]{64}$", re.I)),
@@ -130,9 +130,23 @@ def _local_crack_sync_blocking(hash_value: str, hash_type: str) -> Optional[str]
if not wordlist.exists():
return None
h = hash_value.strip().lower()
# usedforsecurity=False is required on FIPS-enabled systems (Python 3.9+).
# On Python 3.8 the kwarg does not exist, so we fall back gracefully.
def _md5(w):
try:
return _hl.md5(w, usedforsecurity=False).hexdigest()
except TypeError:
return _hl.md5(w).hexdigest()
def _sha1(w):
try:
return _hl.sha1(w, usedforsecurity=False).hexdigest()
except TypeError:
return _hl.sha1(w).hexdigest()
_hashers = {
"md5": lambda w: _hl.md5(w).hexdigest(),
"sha1": lambda w: _hl.sha1(w).hexdigest(),
"md5": _md5,
"sha1": _sha1,
"sha256": lambda w: _hl.sha256(w).hexdigest(),
}
hasher = _hashers.get(hash_type)
+4 -7
View File
@@ -48,11 +48,11 @@ def _raw(v: Any, maxlen: int = 200) -> str:
def _pdf_safe(s: str, maxlen: int = 180) -> str:
# D4: sanitize for fpdf2 core fonts (latin-1 subset).
# Sanitise for fpdf2 core fonts (latin-1 subset).
# NFKD normalization decomposes accented chars (é→e + combining accent)
# so common accented Latin characters survive as their base letter.
# Truly non-latin-1 chars (Cyrillic, CJK, etc.) become '?' — intentional:
# fpdf2 core fonts cannot render them and would raise UnicodeEncodeError.
# Truly non-latin-1 chars (Cyrillic, CJK, etc.) become '?' — fpdf2 core
# fonts cannot render them and would raise UnicodeEncodeError.
s = _raw(s, maxlen)
try:
import unicodedata
@@ -114,7 +114,7 @@ def render_pivot_chain(data: dict) -> List[str]:
chain = data.get("pivot_chain") or []
target = _raw(data.get("target", "?"))
# D2: if pivot_log is available, build chain from it (accurate tree)
# Build chain from pivot_log when available — it carries the full tree with depth and provenance.
pivot_log = data.get("pivot_log") or []
if pivot_log:
lines: List[str] = []
@@ -195,14 +195,12 @@ def to_json(data: dict, path: str) -> None:
dork_results = data.get("dork_results", []) or []
scrape_results = data.get("scrape_results", {}) or {}
# D3: apply consistent cap (1000) — same as HTML
_RECORD_CAP = 1000
out_data = {
"framework": f"NOX v{_NOX_VERSION}",
"generated": datetime.now().isoformat(),
"target": data.get("target", ""),
# J3: self-describing metadata block
"_meta": {
"scan_id": hashlib.sha256(
f"{data.get('target','')}{datetime.now().isoformat()}".encode()
@@ -387,7 +385,6 @@ def to_html(data: dict, path: str) -> None:
# ── PDF report (fpdf2) ────────────────────────────────────────────────
def to_pdf(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None:
# D1: raise a clear error with install hint if fpdf2 is absent — never silently return.
try:
from fpdf import FPDF # type: ignore
except ImportError:
+9 -18
View File
@@ -31,7 +31,6 @@ _PIVOT_TYPES = {"email", "username", "phone", "name", "ip", "domain"}
def _cfg_depth(orc=None) -> int:
# A7/A10: read from orchestrator config if available
if orc is not None:
cfg = getattr(orc, "config", None)
if cfg is not None:
@@ -46,7 +45,6 @@ def _cfg_depth(orc=None) -> int:
def _cfg_concurrency(orc=None) -> int:
# A7: read from orchestrator config if available
if orc is not None:
cfg = getattr(orc, "config", None)
if cfg is not None:
@@ -137,29 +135,24 @@ class AvalancheScanner:
def __init__(self, orchestrator: "Orchestrator") -> None:
self._orc = orchestrator
self.seen_assets: Set[str] = set()
# A2: single semaphore for the entire run, created lazily inside the event loop
self._sem: Optional[asyncio.Semaphore] = None
self._all_records: List = []
self._dork_hits: List[dict] = []
self._seen_dork_urls: Set[str] = set()
# A6: scrape_hits merged atomically per _do_process call
self._scrape_hits: Dict = {"pastes": [], "credentials": [], "hashes": [],
"telegram": [], "dork_misconfigs": []}
self._max_depth: int = 0
self._in_flight: Dict[str, asyncio.Future] = {}
self.pivot_log: List[dict] = []
# A8: global set to prevent duplicate entries in discovered_assets
self._seen_discovered: Set[str] = set()
self.discovered_assets: List[dict] = []
def _get_sem(self) -> asyncio.Semaphore:
# A2: semaphore created once per run, shared across all coroutines
if self._sem is None:
self._sem = asyncio.Semaphore(_cfg_concurrency(self._orc))
return self._sem
async def run(self, target: str) -> tuple:
# A9: respect no_pivot flag from config
cfg = getattr(self._orc, "config", None)
no_pivot = getattr(cfg, "no_pivot", False) if cfg else False
if no_pivot:
@@ -196,7 +189,6 @@ class AvalancheScanner:
async def _process(self, asset: str, depth: int,
parent: Optional[str], found_in: str) -> None:
"""Dedup gate: ensures each asset is processed exactly once."""
# A10: use per-run depth from orchestrator config
if depth > _cfg_depth(self._orc):
_syslog.debug("avalanche depth cap reached for %s", asset)
return
@@ -205,7 +197,7 @@ class AvalancheScanner:
if not key:
return
# A1: add to seen_assets FIRST (atomic gate) before any other check.
# Add to seen_assets before any await to prevent concurrent duplicates.
# If already present, wait on the in-flight future if one exists, then return.
if key in self.seen_assets:
if key in self._in_flight:
@@ -326,7 +318,8 @@ class AvalancheScanner:
_syslog.warning("SCRAPE_FAIL asset=%s err=%s", asset, exc)
scrape_res = {}
# A6: collect scrape results locally, then merge atomically
# Collect scrape results locally then merge into the shared dict.
# The event loop is single-threaded so the merge is safe without a lock.
scrape_count = 0
local_scrape: Dict = {k: [] for k in self._scrape_hits}
for k in self._scrape_hits:
@@ -336,7 +329,7 @@ class AvalancheScanner:
item["pivot_depth"] = depth
local_scrape[k].append(item)
scrape_count += 1
# Atomic merge into shared dict (single-threaded event loop — safe)
# Merge into shared dict — safe within the single-threaded event loop.
for k, items in local_scrape.items():
self._scrape_hits[k].extend(items)
_out("ok" if scrape_count else "dim",
@@ -393,7 +386,6 @@ class AvalancheScanner:
queued.add(child_key)
child_entry = {"asset": val, "qtype": vqtype, "found_in": phase, "ref": ref}
children.append(child_entry)
# A8: prevent duplicate entries in discovered_assets across parallel parents
if child_key not in self._seen_discovered:
self._seen_discovered.add(child_key)
self.discovered_assets.append({
@@ -412,12 +404,12 @@ class AvalancheScanner:
self._process(val, depth + 1, parent=asset, found_in=phase)
)
# A5: run child tasks FIRST, then append pivot_log so the log reflects actual outcomes
# Run child tasks before appending to pivot_log so the log reflects actual outcomes.
if child_tasks:
_out("info", f"{indent} → reinjecting {len(child_tasks)} new asset(s)…")
await asyncio.gather(*child_tasks, return_exceptions=True)
# ── Log this node (after children complete — A5) ──────────────
# ── Log this node ─────────────────────────────────────────────
self.pivot_log.append({
"asset": asset,
"qtype": qtype,
@@ -461,8 +453,8 @@ class AvalancheScanner:
# ── Scrape dispatcher ─────────────────────────────────────────────
async def _async_scrape(self, asset: str) -> dict:
# A3: instantiate a fresh Session + ScrapeEngine per call to avoid sharing
# a non-thread-safe requests.Session / cloudscraper across concurrent coroutines.
# Instantiate a fresh Session and ScrapeEngine per call — requests.Session
# and cloudscraper are not safe to share across concurrent coroutines.
_empty: dict = {"pastes": [], "credentials": [], "hashes": [],
"telegram": [], "dork_misconfigs": []}
try:
@@ -517,8 +509,7 @@ async def _crack_and_inject(session, hash_value: str, record_ref,
_out("ok", f" [crack] {hash_value[:16]}… → {plaintext} (from {parent_asset})")
cracked_out.append(plaintext)
# A4: inject cracked plaintext as qtype="password" — NOT as username.
# Only pivot on it if sources support password-recycling queries.
# Inject the cracked plaintext as a password-recycling pivot seed.
key = plaintext.lower()
if key not in seen_assets and depth + 1 <= _cfg_depth(scanner._orc):
await scanner._process(plaintext, depth + 1,