mirror of
https://github.com/nox-project/nox-framework.git
synced 2026-06-08 16:07:17 +00:00
release: v1.0.2
- 124 sources (+1 xposedornot, bgpview replaced with ripestat) - Fix gravatar MD5 transform, fofa base64 query encoding - Fix misp_search URL resolution, threatconnect HMAC placeholder - Fix spycloud, duckduckgo, mailboxlayer/numverify/ipstack/ipinfodb endpoints - Fix DeHashEngine v1→v2, DorkEngine engine label, backup_endpoints consumed - Fix Retry-After HTTP-date parsing, Hashmob API schema, FIPS hashlib crash - Fix DB.close() event loop leak, _random_headers CH-UA override - Add query_transform mechanism (md5_lower, fofa_domain) - Lower scores: spyonweb, pipl_search, twitter_v2, hudsonrock rate_limit - Clean all internal tracking comments, fix Italian docstring
This commit is contained in:
@@ -29,13 +29,13 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
|
||||
"alienvault_otx_malware": {"display": "AlienVault OTX (Malware)", "public": True},
|
||||
"alienvault_otx_user": {"display": "AlienVault OTX (User)", "public": True},
|
||||
"anubis_subdomains": {"display": "Anubis Subdomains", "public": True},
|
||||
"bgpview_ip": {"display": "BGPView IP", "public": True},
|
||||
"checkleaked": {"display": "CheckLeaked", "public": True},
|
||||
"ripestat_ip": {"display": "RIPE Stat IP", "public": True},
|
||||
"xposedornot": {"display": "XposedOrNot", "public": True},
|
||||
"crt_sh": {"display": "crt.sh", "public": True},
|
||||
"cve_search": {"display": "CVE Search", "public": True},
|
||||
"cxsecurity": {"display": "CXSecurity", "public": True},
|
||||
"duckduckgo_api": {"display": "Google / DDG Dorks", "public": True},
|
||||
"emailrep_io": {"display": "EmailRep.io", "public": True},
|
||||
"emailrep_io": {"display": "EmailRep.io", "public": False},
|
||||
"github_users": {"display": "GitHub Users", "public": True},
|
||||
"gitlab_search": {"display": "GitLab Search", "public": True},
|
||||
"gravatar": {"display": "Gravatar", "public": True},
|
||||
@@ -44,7 +44,10 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
|
||||
"hackertarget_hostsearch": {"display": "HackerTarget Host Search", "public": True},
|
||||
"hackertarget_reverseip": {"display": "HackerTarget Reverse IP", "public": True},
|
||||
"hackertarget_whois": {"display": "WHOIS (HackerTarget)", "public": True},
|
||||
"hudsonrock_osint": {"display": "HudsonRock OSINT", "public": True},
|
||||
"ipapi_is": {"display": "ipapi.is", "public": True},
|
||||
"circl_hashlookup": {"display": "CIRCL Hash Lookup", "public": True},
|
||||
"proxynova_comb": {"display": "ProxyNova COMB", "public": True},
|
||||
"shodan_internetdb": {"display": "Shodan InternetDB", "public": True},
|
||||
"ipapi_co": {"display": "ipapi.co", "public": True},
|
||||
"ipinfo_io": {"display": "IPInfo.io", "public": True},
|
||||
"ipvigilante": {"display": "IPVigilante", "public": True},
|
||||
@@ -59,14 +62,10 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
|
||||
"reddit_user": {"display": "Reddit User", "public": True},
|
||||
"robtex_ip": {"display": "Robtex IP", "public": True},
|
||||
"scamwatcher": {"display": "ScamWatcher", "public": True},
|
||||
"social_scan": {"display": "Social Scan", "public": True},
|
||||
"sublist3r_api": {"display": "Sublist3r API", "public": True},
|
||||
"threatcrowd_domain": {"display": "ThreatCrowd (Domain)", "public": True},
|
||||
"threatcrowd_email": {"display": "ThreatCrowd (Email)", "public": True},
|
||||
"threatminer_domain": {"display": "ThreatMiner (Domain)", "public": True},
|
||||
"threatminer_ip": {"display": "ThreatMiner (IP)", "public": True},
|
||||
"urlscan_search": {"display": "URLScan.io", "public": True},
|
||||
"vigilante_pw": {"display": "Vigilante.pw", "public": True},
|
||||
"wayback_machine": {"display": "Wayback Machine", "public": True},
|
||||
# ── Private / key-required ────────────────────────────────────────
|
||||
"ABSTRACT_API_KEY": {"display": "Abstract Email Validation", "public": False},
|
||||
@@ -78,7 +77,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
|
||||
"BING_API_KEY": {"display": "Bing Search API", "public": False},
|
||||
"CENSYS_AUTH_BASE64": {"display": "Censys", "public": False},
|
||||
"CIRCL_AUTH_BASE64": {"display": "CIRCL.lu PDNS", "public": False},
|
||||
"CIT0DAY_API_KEY": {"display": "Cit0day", "public": False},
|
||||
"SEON_API_KEY": {"display": "SEON Email Intelligence", "public": False},
|
||||
"CRIMINALIP_API_KEY": {"display": "CriminalIP", "public": False},
|
||||
"DEHASHED_AUTH_BASE64": {"display": "Dehashed", "public": False},
|
||||
@@ -108,7 +106,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
|
||||
"JOE_API_KEY": {"display": "Joe Sandbox", "public": False},
|
||||
"LEAKCHECK_API_KEY": {"display": "LeakCheck", "public": False},
|
||||
"LEAKIX_API_KEY": {"display": "LeakIX", "public": False},
|
||||
"LEAKSTATS_API_KEY": {"display": "LeakStats.pw", "public": False},
|
||||
"MAILBOX_API_KEY": {"display": "Mailboxlayer", "public": False},
|
||||
"MALSHARE_API_KEY": {"display": "MalShare", "public": False},
|
||||
"METADEFENDER_API_KEY": {"display": "MetaDefender", "public": False},
|
||||
@@ -124,7 +121,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
|
||||
"SNUSBASE_API_KEY": {"display": "Snusbase", "public": False},
|
||||
"SPYCLOUD_API_KEY": {"display": "SpyCloud", "public": False},
|
||||
"SPYONWEB_API_KEY": {"display": "SpyOnWeb", "public": False},
|
||||
"SPYSE_API_KEY": {"display": "Spyse", "public": False},
|
||||
"TC_API_KEY": {"display": "ThreatConnect", "public": False},
|
||||
"FLARE_API_KEY": {"display": "Flare LeaksDB", "public": False},
|
||||
"TP_API_KEY": {"display": "ThreatPortal", "public": False},
|
||||
@@ -138,7 +134,6 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
|
||||
"WHOXY_API_KEY": {"display": "Whoxy WHOIS", "public": False},
|
||||
"ZEROBOUNCE_API_KEY": {"display": "ZeroBounce", "public": False},
|
||||
"ZOOMEYE_API_KEY": {"display": "ZoomEye", "public": False},
|
||||
# ── Added in v1.0.1 ───────────────────────────────────────────────
|
||||
"EMAILREP_API_KEY": {"display": "EmailRep.io", "public": False},
|
||||
"HASHES_COM_API_KEY": {"display": "Hashes.com (crack API)", "public": False},
|
||||
"THREATFOX_API_KEY": {"display": "ThreatFox (abuse.ch)", "public": False},
|
||||
@@ -146,8 +141,8 @@ SERVICE_REGISTRY: Dict[str, Dict] = {
|
||||
"MALWAREBAZAAR_API_KEY": {"display": "MalwareBazaar (abuse.ch)", "public": False},
|
||||
"FULLHUNT_API_KEY": {"display": "FullHunt (attack surface)", "public": False},
|
||||
"NETLAS_API_KEY": {"display": "Netlas.io (internet scanner)", "public": False},
|
||||
# ── Added in v1.0.2 ───────────────────────────────────────────────
|
||||
"LEAK_LOOKUP_API_KEY": {"display": "Leak-Lookup", "public": False},
|
||||
"MISP_URL": {"display": "MISP Instance URL", "public": False},
|
||||
}
|
||||
|
||||
_PRIVATE_KEYS = {k: v for k, v in SERVICE_REGISTRY.items() if not v["public"]}
|
||||
|
||||
@@ -12,9 +12,9 @@ import logging
|
||||
import re
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
# C2: MD5 and NTLM share the same 32-char hex pattern.
|
||||
# We list md5 first (most common in breach data) but also accept ntlm
|
||||
# so callers can query NTLM-specific APIs when needed.
|
||||
# MD5 and NTLM share the same 32-char hex pattern. MD5 is listed first as it
|
||||
# is the most common type in breach data. async_crack queries both md5 and
|
||||
# ntlm-specific APIs for any 32-char hash.
|
||||
_PATTERNS: List[Tuple[str, re.Pattern]] = [
|
||||
("bcrypt", re.compile(r"^\$2[aby]?\$\d{2}\$.{53}$")),
|
||||
("sha256", re.compile(r"^[a-f0-9]{64}$", re.I)),
|
||||
@@ -130,9 +130,23 @@ def _local_crack_sync_blocking(hash_value: str, hash_type: str) -> Optional[str]
|
||||
if not wordlist.exists():
|
||||
return None
|
||||
h = hash_value.strip().lower()
|
||||
# usedforsecurity=False is required on FIPS-enabled systems (Python 3.9+).
|
||||
# On Python 3.8 the kwarg does not exist, so we fall back gracefully.
|
||||
def _md5(w):
|
||||
try:
|
||||
return _hl.md5(w, usedforsecurity=False).hexdigest()
|
||||
except TypeError:
|
||||
return _hl.md5(w).hexdigest()
|
||||
|
||||
def _sha1(w):
|
||||
try:
|
||||
return _hl.sha1(w, usedforsecurity=False).hexdigest()
|
||||
except TypeError:
|
||||
return _hl.sha1(w).hexdigest()
|
||||
|
||||
_hashers = {
|
||||
"md5": lambda w: _hl.md5(w).hexdigest(),
|
||||
"sha1": lambda w: _hl.sha1(w).hexdigest(),
|
||||
"md5": _md5,
|
||||
"sha1": _sha1,
|
||||
"sha256": lambda w: _hl.sha256(w).hexdigest(),
|
||||
}
|
||||
hasher = _hashers.get(hash_type)
|
||||
|
||||
@@ -48,11 +48,11 @@ def _raw(v: Any, maxlen: int = 200) -> str:
|
||||
|
||||
|
||||
def _pdf_safe(s: str, maxlen: int = 180) -> str:
|
||||
# D4: sanitize for fpdf2 core fonts (latin-1 subset).
|
||||
# Sanitise for fpdf2 core fonts (latin-1 subset).
|
||||
# NFKD normalization decomposes accented chars (é→e + combining accent)
|
||||
# so common accented Latin characters survive as their base letter.
|
||||
# Truly non-latin-1 chars (Cyrillic, CJK, etc.) become '?' — intentional:
|
||||
# fpdf2 core fonts cannot render them and would raise UnicodeEncodeError.
|
||||
# Truly non-latin-1 chars (Cyrillic, CJK, etc.) become '?' — fpdf2 core
|
||||
# fonts cannot render them and would raise UnicodeEncodeError.
|
||||
s = _raw(s, maxlen)
|
||||
try:
|
||||
import unicodedata
|
||||
@@ -114,7 +114,7 @@ def render_pivot_chain(data: dict) -> List[str]:
|
||||
chain = data.get("pivot_chain") or []
|
||||
target = _raw(data.get("target", "?"))
|
||||
|
||||
# D2: if pivot_log is available, build chain from it (accurate tree)
|
||||
# Build chain from pivot_log when available — it carries the full tree with depth and provenance.
|
||||
pivot_log = data.get("pivot_log") or []
|
||||
if pivot_log:
|
||||
lines: List[str] = []
|
||||
@@ -195,14 +195,12 @@ def to_json(data: dict, path: str) -> None:
|
||||
dork_results = data.get("dork_results", []) or []
|
||||
scrape_results = data.get("scrape_results", {}) or {}
|
||||
|
||||
# D3: apply consistent cap (1000) — same as HTML
|
||||
_RECORD_CAP = 1000
|
||||
|
||||
out_data = {
|
||||
"framework": f"NOX v{_NOX_VERSION}",
|
||||
"generated": datetime.now().isoformat(),
|
||||
"target": data.get("target", ""),
|
||||
# J3: self-describing metadata block
|
||||
"_meta": {
|
||||
"scan_id": hashlib.sha256(
|
||||
f"{data.get('target','')}{datetime.now().isoformat()}".encode()
|
||||
@@ -387,7 +385,6 @@ def to_html(data: dict, path: str) -> None:
|
||||
# ── PDF report (fpdf2) ────────────────────────────────────────────────
|
||||
|
||||
def to_pdf(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None:
|
||||
# D1: raise a clear error with install hint if fpdf2 is absent — never silently return.
|
||||
try:
|
||||
from fpdf import FPDF # type: ignore
|
||||
except ImportError:
|
||||
|
||||
@@ -31,7 +31,6 @@ _PIVOT_TYPES = {"email", "username", "phone", "name", "ip", "domain"}
|
||||
|
||||
|
||||
def _cfg_depth(orc=None) -> int:
|
||||
# A7/A10: read from orchestrator config if available
|
||||
if orc is not None:
|
||||
cfg = getattr(orc, "config", None)
|
||||
if cfg is not None:
|
||||
@@ -46,7 +45,6 @@ def _cfg_depth(orc=None) -> int:
|
||||
|
||||
|
||||
def _cfg_concurrency(orc=None) -> int:
|
||||
# A7: read from orchestrator config if available
|
||||
if orc is not None:
|
||||
cfg = getattr(orc, "config", None)
|
||||
if cfg is not None:
|
||||
@@ -137,29 +135,24 @@ class AvalancheScanner:
|
||||
def __init__(self, orchestrator: "Orchestrator") -> None:
|
||||
self._orc = orchestrator
|
||||
self.seen_assets: Set[str] = set()
|
||||
# A2: single semaphore for the entire run, created lazily inside the event loop
|
||||
self._sem: Optional[asyncio.Semaphore] = None
|
||||
self._all_records: List = []
|
||||
self._dork_hits: List[dict] = []
|
||||
self._seen_dork_urls: Set[str] = set()
|
||||
# A6: scrape_hits merged atomically per _do_process call
|
||||
self._scrape_hits: Dict = {"pastes": [], "credentials": [], "hashes": [],
|
||||
"telegram": [], "dork_misconfigs": []}
|
||||
self._max_depth: int = 0
|
||||
self._in_flight: Dict[str, asyncio.Future] = {}
|
||||
self.pivot_log: List[dict] = []
|
||||
# A8: global set to prevent duplicate entries in discovered_assets
|
||||
self._seen_discovered: Set[str] = set()
|
||||
self.discovered_assets: List[dict] = []
|
||||
|
||||
def _get_sem(self) -> asyncio.Semaphore:
|
||||
# A2: semaphore created once per run, shared across all coroutines
|
||||
if self._sem is None:
|
||||
self._sem = asyncio.Semaphore(_cfg_concurrency(self._orc))
|
||||
return self._sem
|
||||
|
||||
async def run(self, target: str) -> tuple:
|
||||
# A9: respect no_pivot flag from config
|
||||
cfg = getattr(self._orc, "config", None)
|
||||
no_pivot = getattr(cfg, "no_pivot", False) if cfg else False
|
||||
if no_pivot:
|
||||
@@ -196,7 +189,6 @@ class AvalancheScanner:
|
||||
async def _process(self, asset: str, depth: int,
|
||||
parent: Optional[str], found_in: str) -> None:
|
||||
"""Dedup gate: ensures each asset is processed exactly once."""
|
||||
# A10: use per-run depth from orchestrator config
|
||||
if depth > _cfg_depth(self._orc):
|
||||
_syslog.debug("avalanche depth cap reached for %s", asset)
|
||||
return
|
||||
@@ -205,7 +197,7 @@ class AvalancheScanner:
|
||||
if not key:
|
||||
return
|
||||
|
||||
# A1: add to seen_assets FIRST (atomic gate) before any other check.
|
||||
# Add to seen_assets before any await to prevent concurrent duplicates.
|
||||
# If already present, wait on the in-flight future if one exists, then return.
|
||||
if key in self.seen_assets:
|
||||
if key in self._in_flight:
|
||||
@@ -326,7 +318,8 @@ class AvalancheScanner:
|
||||
_syslog.warning("SCRAPE_FAIL asset=%s err=%s", asset, exc)
|
||||
scrape_res = {}
|
||||
|
||||
# A6: collect scrape results locally, then merge atomically
|
||||
# Collect scrape results locally then merge into the shared dict.
|
||||
# The event loop is single-threaded so the merge is safe without a lock.
|
||||
scrape_count = 0
|
||||
local_scrape: Dict = {k: [] for k in self._scrape_hits}
|
||||
for k in self._scrape_hits:
|
||||
@@ -336,7 +329,7 @@ class AvalancheScanner:
|
||||
item["pivot_depth"] = depth
|
||||
local_scrape[k].append(item)
|
||||
scrape_count += 1
|
||||
# Atomic merge into shared dict (single-threaded event loop — safe)
|
||||
# Merge into shared dict — safe within the single-threaded event loop.
|
||||
for k, items in local_scrape.items():
|
||||
self._scrape_hits[k].extend(items)
|
||||
_out("ok" if scrape_count else "dim",
|
||||
@@ -393,7 +386,6 @@ class AvalancheScanner:
|
||||
queued.add(child_key)
|
||||
child_entry = {"asset": val, "qtype": vqtype, "found_in": phase, "ref": ref}
|
||||
children.append(child_entry)
|
||||
# A8: prevent duplicate entries in discovered_assets across parallel parents
|
||||
if child_key not in self._seen_discovered:
|
||||
self._seen_discovered.add(child_key)
|
||||
self.discovered_assets.append({
|
||||
@@ -412,12 +404,12 @@ class AvalancheScanner:
|
||||
self._process(val, depth + 1, parent=asset, found_in=phase)
|
||||
)
|
||||
|
||||
# A5: run child tasks FIRST, then append pivot_log so the log reflects actual outcomes
|
||||
# Run child tasks before appending to pivot_log so the log reflects actual outcomes.
|
||||
if child_tasks:
|
||||
_out("info", f"{indent} → reinjecting {len(child_tasks)} new asset(s)…")
|
||||
await asyncio.gather(*child_tasks, return_exceptions=True)
|
||||
|
||||
# ── Log this node (after children complete — A5) ──────────────
|
||||
# ── Log this node ─────────────────────────────────────────────
|
||||
self.pivot_log.append({
|
||||
"asset": asset,
|
||||
"qtype": qtype,
|
||||
@@ -461,8 +453,8 @@ class AvalancheScanner:
|
||||
# ── Scrape dispatcher ─────────────────────────────────────────────
|
||||
|
||||
async def _async_scrape(self, asset: str) -> dict:
|
||||
# A3: instantiate a fresh Session + ScrapeEngine per call to avoid sharing
|
||||
# a non-thread-safe requests.Session / cloudscraper across concurrent coroutines.
|
||||
# Instantiate a fresh Session and ScrapeEngine per call — requests.Session
|
||||
# and cloudscraper are not safe to share across concurrent coroutines.
|
||||
_empty: dict = {"pastes": [], "credentials": [], "hashes": [],
|
||||
"telegram": [], "dork_misconfigs": []}
|
||||
try:
|
||||
@@ -517,8 +509,7 @@ async def _crack_and_inject(session, hash_value: str, record_ref,
|
||||
_out("ok", f" [crack] {hash_value[:16]}… → {plaintext} (from {parent_asset})")
|
||||
cracked_out.append(plaintext)
|
||||
|
||||
# A4: inject cracked plaintext as qtype="password" — NOT as username.
|
||||
# Only pivot on it if sources support password-recycling queries.
|
||||
# Inject the cracked plaintext as a password-recycling pivot seed.
|
||||
key = plaintext.lower()
|
||||
if key not in seen_assets and depth + 1 <= _cfg_depth(scanner._orc):
|
||||
await scanner._process(plaintext, depth + 1,
|
||||
|
||||
Reference in New Issue
Block a user