diff --git a/.gitignore b/.gitignore index d140886..53570ae 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,3 @@ Thumbs.db # Env files .env .env.* - -# Internal development notes — not for distribution -IMPROVEMENTS.md diff --git a/CHANGELOG.md b/CHANGELOG.md index bec60be..a73b6a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,42 @@ All notable changes to NOX are documented here. +## [1.0.1] — 2026-04-11 + +### Sources +- **Added 9 new sources:** `proxynova_comb` (COMB breach search, free), `shodan_internetdb` (IP intel, free), `circl_hashlookup` (NSRL hash lookup, free), `ipapi_is` (IP geolocation, free), `threatfox` (abuse.ch IOC database), `urlhaus` (abuse.ch malware URLs), `malwarebazaar` (abuse.ch hash lookup), `fullhunt_subdomains` (attack surface), `netlas_search` (internet scanner) +- **Removed 7 dead sources:** `threatcrowd_email`, `threatcrowd_domain` (DNS dead), `spyse_domain`, `spyse_ip` (API shut down), `hashes_org` (DNS dead), `leakstats_pw` (DNS dead), `checkleaked` (endpoint gone) +- **Fixed:** `dehashed` endpoint migrated from `/search` to `/v2/search` +- **Fixed:** `hudsonrock_osint` endpoint corrected to `search-by-email` with `input_type: email` (was `search-by-login` with `input_type: username`) +- **Fixed:** `scylla_sh_search` migrated from dead `scylla.sh` to active `scylla.so` +- **Fixed:** `emailrep_io` now requires API key (`EMAILREP_API_KEY`) — free unauthenticated tier removed by provider +- **Fixed:** `duckduckgo_api` repurposed from dead DDG Instant Answer API to SearXNG JSON search + +### Engine +- **Fixed:** POST 429 `Retry-After` cap was 4s (should be 30s, matching GET path) +- **Fixed:** Linear retry backoff replaced with exponential backoff + jitter in all 4 retry paths (`_get`, `_post`, `Session.get`, `Session.post`) +- **Fixed:** `--reset-sources` now removes orphaned plugins from `~/.nox/sources/` in addition to copying new ones +- **Fixed:** DDG HTML scraper replaced with SearXNG JSON API across all call sites — DDG HTML endpoint bot-blocked since 2025 +- **Fixed:** SearXNG instance pool updated: `searx.be` (403), `search.bus-hit.me` (DNS dead), `searxng.site` (SSL error) replaced with 6 active instances; pool extracted to module-level `_SEARX_INSTANCES` constant +- **Fixed:** All 11 dead paste site APIs removed from `ScrapeEngine.PASTE_SITES`; paste intelligence now routed through SearXNG dorks and IntelX + +### WAF Resilience +- **Updated:** User-Agent pool updated to Chrome/135, Firefox/136, Edge/135 (was Chrome/131, Firefox/133) +- **Added:** `Sec-CH-UA`, `Sec-CH-UA-Mobile`, `Sec-CH-UA-Platform` Client Hints headers for Chromium-based UAs +- **Fixed:** `_CH_UA_MAP` ordering — Edge UA now correctly gets `"Microsoft Edge"` brand (was getting `"Google Chrome"` due to dict iteration order) +- **Fixed:** `_search()` sync method no longer passes `use_cloudscraper=True` to SearXNG JSON API calls + +### Hash Cracking +- **Removed 6 dead/paywalled cracker APIs:** nitrxgen (DNS dead), hash.help (DNS dead), hashkiller (403), hashes.com free path (404), md5decrypt (403), cmd5 (paywalled — returns `CMD5-ERROR:-1` for all hashes) +- **Added:** Local rockyou wordlist as primary crack path (no external calls, no rate limits, no data leakage) +- **Added:** `hashes.com` keyed API as external fallback (`HASHES_COM_API_KEY`) + +### Dependencies +- **Added:** `brotli>=1.1.0` — required for aiohttp to decompress `br`-encoded responses + +### Config +- **Added 7 new API key slots:** `EMAILREP_API_KEY`, `HASHES_COM_API_KEY`, `THREATFOX_API_KEY`, `URLHAUS_API_KEY`, `MALWAREBAZAAR_API_KEY`, `FULLHUNT_API_KEY`, `NETLAS_API_KEY` + ## [1.0.0] — 2026-04-02 ### Initial Release diff --git a/README.md b/README.md index a766258..6b554f5 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,13 @@ **Cyber Threat Intelligence Framework** -[![Status](https://img.shields.io/badge/Status-v1.0.0-success)](https://github.com/nox-project/nox-framework/releases/tag/v1.0.0) +[![Status](https://img.shields.io/badge/Status-v1.0.1-success)](https://github.com/nox-project/nox-framework/releases/tag/v1.0.1) [![Python](https://img.shields.io/badge/Python-3.8%2B-blue?logo=python&logoColor=white)](https://www.python.org/) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE.txt) [![Kali Linux](https://img.shields.io/badge/Kali%20Linux-Ready-557C94?logo=kalilinux&logoColor=white)](https://www.kali.org/) +[![BlackArch](https://img.shields.io/badge/BlackArch-Available-1E1E2E?logo=archlinux&logoColor=white)](https://blackarch.org/) [![Platform](https://img.shields.io/badge/Platform-Linux%20%7C%20macOS%20%7C%20Windows-lightgrey)](https://github.com/nox-project/nox-framework) -[![Sources](https://img.shields.io/badge/Sources-124-red)](https://github.com/nox-project/nox-framework) +[![Sources](https://img.shields.io/badge/Sources-126-red)](https://github.com/nox-project/nox-framework) *OSINT framework for red teaming, digital forensics, and corporate exposure analysis.* @@ -30,7 +31,7 @@ NOX is a purpose-built cyber threat intelligence engine designed for operators w | Capability | Detail | |-|-| -| ⚡ **Async Execution Engine** | Massively parallel scanning across 124 intelligence feeds with no sequential bottlenecks and no blocking I/O. | +| ⚡ **Async Execution Engine** | Massively parallel scanning across 126 intelligence feeds with no sequential bottlenecks and no blocking I/O. | | 🛡️ **Guardian Engine** | Integrated OPSEC layer with automatic proxy rotation and SOCKS5 support. Fail-safe kill-switch halts all traffic if the transport circuit is unavailable. | | 🧠 **Risk Scoring** | Dynamic 0–100 scoring with time-decay, source confidence weighting, password complexity analysis, persistence multipliers, and HVT detection. | | 🔗 **Recursive Avalanche Engine** | Every discovered asset — username, email, cracked password, phone — is automatically re-injected as a new scan seed. Per-asset pipeline runs sequentially (breach → crack → dork → scrape); child assets run concurrently. Identifiers from all four phases feed the pivot queue. Global deduplication and configurable depth cap prevent runaway recursion. | @@ -42,9 +43,9 @@ NOX is a purpose-built cyber threat intelligence engine designed for operators w | Feature | Description | |-|-| -| **124 JSON Plugin Sources** | Every intelligence source is a JSON plugin. The execution engine contains zero hardcoded source logic. | +| **126 JSON Plugin Sources** | Every intelligence source is a JSON plugin. The execution engine contains zero hardcoded source logic. | | **Async Core** | Full `asyncio` event loop with JA3 fingerprinting, SSL session management, per-request jitter, and configurable concurrency. | -| **Autoscan Pipeline** | `--autoscan` triggers: breach scan → recursive pivot → Google/Bing/DDG dorking → paste/Telegram scraping — all in one command. | +| **Autoscan Pipeline** | `--autoscan` triggers: breach scan → recursive pivot → Google/Bing/SearXNG dorking → paste/Telegram scraping — all in one command. | | **Recursive Avalanche Engine** | Every identifier discovered — from breach records, dork hits, or scraped paste/Telegram content — is re-injected as a new seed. Per-asset pipeline is sequential (breach → crack → dork → scrape); child assets run concurrently via `asyncio.gather`. A global `seen_assets` set prevents infinite loops. Concurrency and depth are fully configurable at runtime via `--threads` and `--depth`. | | **Hash Pivoting** | Hashes found in breach data are automatically identified (MD5/SHA1/SHA256/NTLM/bcrypt) and cracked via concurrent background API queries. Cracked plaintexts are injected into the pivot queue as password-recycling seeds. Failures are logged silently — the scan never stops. | | **Guardian Proxy Engine** | Zero-config OPSEC layer: reads `proxies.txt` if present; otherwise auto-fetches and validates a high-anonymity proxy pool in-memory. Full SOCKS5/HTTP/S and Tor support. | @@ -52,7 +53,7 @@ NOX is a purpose-built cyber threat intelligence engine designed for operators w | **Identity Graphing** | Union-Find correlation engine unifies breach records into identity clusters across all sources, using type-aware pivot classification. | | **Enterprise Forensic Reports** | Professional PDF/HTML/JSON/CSV/Markdown reports with Executive Summary dashboard (Total Time, Nodes Discovered, Cleartext Passwords, Pivot Depth), interactive Pivot Chain Visualization, and strict data sanitization — no technical noise in output. JSON exports are self-describing with a full metadata block. | | **HVT Detection** | Auto-flags C-level, Admin, DevOps, and government domain accounts as High-Value Targets. | -| **Dorking Engine** | Passive document discovery via Google/Bing/DDG dorks with PDF/Office metadata extraction. | +| **Dorking Engine** | Passive document discovery via Google/Bing/SearXNG dorks with PDF/Office metadata extraction. | | **Scraping Engine** | Paste site indexing, Telegram CTI channel monitoring, credential extraction, and misconfiguration discovery. Each autoscan asset gets a dedicated scrape session — no shared state. | | **Proxy / Tor** | SOCKS5, HTTP/S proxy, full Tor routing via `stem`, and automatic Guardian fallback. SOCKS5 proxies are validated and routed correctly via `aiohttp-socks`. | | **Secure Key Store** | API keys managed via `~/.config/nox-cli/apikeys.json` (chmod 0600). Unconfigured keys are silently skipped. Keys set via environment variable are picked up automatically without restarting. | @@ -107,14 +108,14 @@ Supported fields: `name`, `endpoint`, `method`, `headers`, `regex_pattern` (or ` ``` For each asset (seed + every discovered identifier): ├─ Phase 1 — Breach Scan - │ 124 sources queried in parallel (async) + │ 126 sources queried in parallel (async) │ ├─ Phase 2 — Hash Crack (non-blocking, concurrent) │ Hashes found in breach data → rainbow-table APIs → cracked plaintext │ → password-recycling breach scan │ ├─ Phase 3 — Dorking - │ Google/Bing/DDG dorks → leaked docs, .env files, SQL dumps + │ Google/Bing/SearXNG dorks → leaked docs, .env files, SQL dumps │ → new identifiers extracted and re-injected │ └─ Phase 4 — Scraping @@ -257,7 +258,7 @@ nox-cli --help The post-install script automatically: 1. Creates an isolated virtual environment at `/opt/nox-cli/.venv` 2. Installs all Python dependencies inside the venv (PEP 668 compliant — zero system pollution) -3. Builds the 124 source plugins +3. Builds the 126 source plugins 4. Links `/usr/bin/nox-cli` → `/opt/nox-cli/nox-wrapper.sh` ### Option 2: From Source @@ -373,7 +374,7 @@ usage: nox-cli [-h] [-t TARGET] [-i] [--version] --fullscan Breach + pivot only (no dork/scrape) --no-pivot Disable recursive pivot enrichment --depth N Avalanche pivot depth (default: 2) - --dork TARGET Google/Bing/DDG dorking for leaked documents + --dork TARGET Google/Bing/SearXNG dorking for leaked documents --scrape TARGET Paste site + Telegram scraping --crack HASH Identify and crack a hash --no-online-crack Local wordlist only — no data sent to third-party APIs @@ -406,7 +407,7 @@ Command Description ----------- --------------------------------------------------------------- autoscan Full pipeline: breach + pivot + dork + scrape scan Breach intelligence scan only -dork Google/Bing/DDG dorking for leaked documents +dork Google/Bing/SearXNG dorking for leaked documents scrape Paste site + Telegram scraping crack Identify and crack a hash analyze Deep password strength analysis diff --git a/build_sources.py b/build_sources.py index bed883e..97c28ae 100644 --- a/build_sources.py +++ b/build_sources.py @@ -240,24 +240,6 @@ FREE_PUBLIC_SOURCES: List[SourceConfig] = [ tags=["passive"], health_check_url="https://urlscan.io", reliability_score=5), - _base("threatcrowd_email", "threat_intel", - "https://www.threatcrowd.org/searchApi/v2/email/report/?email={target}", "GET", - {"domains": "$.domains"}, - rate_limit=5.0, - input_type="email", output_type=["domain"], - tags=["passive", "threat"], - health_check_url="https://www.threatcrowd.org", reliability_score=3, - is_volatile=True, bypass_required=["cloudflare"], user_agent_type="browser"), - - _base("threatcrowd_domain", "threat_intel", - "https://www.threatcrowd.org/searchApi/v2/domain/report/?domain={target}", "GET", - {"ips": "$.resolutions[*].ip_address"}, - rate_limit=5.0, - input_type="domain", output_type=["ip"], - tags=["passive", "threat"], - health_check_url="https://www.threatcrowd.org", reliability_score=3, - is_volatile=True, bypass_required=["cloudflare"], user_agent_type="browser"), - _base("pulsedive", "threat_intel", "https://pulsedive.com/api/info.php?indicator={target}", "GET", {"risk": "$.risk", "threats": "$.threats"}, @@ -267,9 +249,9 @@ FREE_PUBLIC_SOURCES: List[SourceConfig] = [ health_check_url="https://pulsedive.com", reliability_score=4), _base("hudsonrock_osint", "breach_data", - "https://cavalier.hudsonrock.com/api/json/v2/osint-tools/search-by-login?username={target}", "GET", + "https://cavalier.hudsonrock.com/api/json/v2/osint-tools/search-by-email?email={target}", "GET", {"stealers": "$.stealers"}, - input_type="username", output_type=["email", "domain"], + input_type="email", output_type=["email", "domain", "username"], normalization_map={"stealers": "breach_record"}, tags=["passive", "stealth"], health_check_url="https://cavalier.hudsonrock.com", reliability_score=4), @@ -296,12 +278,14 @@ FREE_PUBLIC_SOURCES: List[SourceConfig] = [ {"prefixes": "$.data.prefixes[*].prefix"}, input_type="ip", output_type=["ip"], tags=["passive", "infrastructure"], - health_check_url="https://api.bgpview.io", reliability_score=4), + health_check_url="https://api.bgpview.io", reliability_score=2, is_volatile=True), - _base("emailrep_io", "email_rep", + _auth("emailrep_io", "email_rep", "https://emailrep.io/{target}", "GET", {"reputation": "$.reputation"}, rate_limit=2.0, + headers={"Key": "{EMAILREP_API_KEY}"}, + api_key_slots=["{EMAILREP_API_KEY}"], input_type="email", output_type=["email"], normalization_map={"reputation": "email_reputation"}, tags=["passive", "fast"], @@ -446,11 +430,12 @@ FREE_PUBLIC_SOURCES: List[SourceConfig] = [ health_check_url="https://checkurl.phishtank.com", reliability_score=4), _base("duckduckgo_api", "search", - "https://api.duckduckgo.com/?q={target}&format=json", "GET", - {"abstract": "$.Abstract"}, + "https://searx.tiekoetter.com/search?q={target}&format=json&categories=general", "GET", + {"results": "$.results"}, input_type="any", output_type=["url"], + normalization_map={"url": "url", "title": "title"}, tags=["passive", "fast"], - health_check_url="https://api.duckduckgo.com", reliability_score=5), + health_check_url="https://searx.tiekoetter.com", reliability_score=3, is_volatile=True), _base("cve_search", "vulns", "https://cve.circl.lu/api/cve/{target}", "GET", @@ -474,21 +459,14 @@ FREE_PUBLIC_SOURCES: List[SourceConfig] = [ tags=["passive"], health_check_url="https://packetstormsecurity.com", reliability_score=4), - _base("checkleaked", "breaches", - "https://api.checkleaked.cc/check/{target}", "GET", - {"found": "$.found"}, - input_type="email", output_type=["email"], - tags=["passive", "stealth"], - health_check_url="https://api.checkleaked.cc", reliability_score=2, is_volatile=True, - backup_endpoints=["https://checkleaked.cc/api/check/{target}"]), - _base("scylla_sh_search", "breaches", - "https://scylla.sh/search?q={target}", "GET", + "https://scylla.so/search?q={target}", "GET", {"results": "$.*"}, input_type="email", output_type=["email", "domain"], tags=["passive", "stealth"], - health_check_url="https://scylla.sh", reliability_score=2, is_volatile=True, - backup_endpoints=["https://scylla.sh/api/search?q={target}"]), + health_check_url="https://scylla.so", reliability_score=2, is_volatile=True, + bypass_required=["cloudflare"], user_agent_type="browser", + backup_endpoints=["https://scylla.so/api/search?q={target}"]), _base("vigilante_pw", "breaches", "https://vigilante.pw/api/search?q={target}", "GET", @@ -496,6 +474,44 @@ FREE_PUBLIC_SOURCES: List[SourceConfig] = [ input_type="email", output_type=["email"], tags=["passive", "stealth"], health_check_url="https://vigilante.pw", reliability_score=2, is_volatile=True), + + # ── New free sources (v1.0.1) ───────────────────────────────────────────── + + _base("proxynova_comb", "breaches", + "https://api.proxynova.com/comb?query={target}", "GET", + {"lines": "$.lines"}, + input_type="email", output_type=["email"], + normalization_map={"lines": "credential_line"}, + tags=["passive", "stealth"], + health_check_url="https://api.proxynova.com", + reliability_score=3, is_volatile=True), + + _base("shodan_internetdb", "scanners", + "https://internetdb.shodan.io/{target}", "GET", + {"hostnames": "$.hostnames", "ports": "$.ports", "vulns": "$.vulns"}, + input_type="ip", output_type=["domain", "ip"], + normalization_map={"hostnames": "domain", "vulns": "cve"}, + tags=["passive", "fast", "infrastructure"], + health_check_url="https://internetdb.shodan.io", + reliability_score=5), + + _base("circl_hashlookup", "hashes", + "https://hashlookup.circl.lu/lookup/md5/{target}", "GET", + {"filename": "$.FileName", "known_malicious": "$.KnownMalicious"}, + input_type="hash", output_type=["hash"], + normalization_map={"FileName": "filename", "MD5": "hash_md5"}, + tags=["passive", "fast"], + health_check_url="https://hashlookup.circl.lu", + reliability_score=5), + + _base("ipapi_is", "geolocation", + "https://api.ipapi.is/?q={target}", "GET", + {"org": "$.org", "asn": "$.asn.asn", "abuse": "$.abuse.email"}, + input_type="ip", output_type=["domain"], + normalization_map={"org": "asn_org", "asn": "asn_number", "abuse": "abuse_contact"}, + tags=["passive", "fast"], + health_check_url="https://api.ipapi.is", + reliability_score=4), ] # --------------------------------------------------------------------------- @@ -586,24 +602,6 @@ AUTHENTICATED_PREMIUM_SOURCES: List[SourceConfig] = [ tags=["passive", "infrastructure"], health_check_url="https://fofa.info", reliability_score=4), - _auth("spyse_domain", "scanners", - "https://api.spyse.com/v1/domain/details/{target}", "GET", - {"asn": "$.data.asn"}, - headers={"Authorization": "Bearer {SPYSE_API_KEY}"}, - api_key_slots=["{SPYSE_API_KEY}"], - input_type="domain", output_type=["ip"], - tags=["passive"], - health_check_url="https://api.spyse.com", reliability_score=3), - - _auth("spyse_ip", "scanners", - "https://api.spyse.com/v1/ip/details/{target}", "GET", - {"geo": "$.data.geo"}, - headers={"Authorization": "Bearer {SPYSE_API_KEY}"}, - api_key_slots=["{SPYSE_API_KEY}"], - input_type="ip", output_type=["ip"], - tags=["passive"], - health_check_url="https://api.spyse.com", reliability_score=3), - _auth("onyphe_datascan", "scanners", "https://www.onyphe.io/api/v2/simple/datascan/{target}", "GET", {"results": "$.results"}, @@ -824,7 +822,7 @@ AUTHENTICATED_PREMIUM_SOURCES += [ health_check_url="https://haveibeenpwned.com", reliability_score=5), _auth("dehashed", "breaches", - "https://api.dehashed.com/search?query={target}", "GET", + "https://api.dehashed.com/v2/search?query={target}", "GET", {"entries": "$.entries"}, headers={"Authorization": "Basic {DEHASHED_AUTH_BASE64}", "Accept": "application/json"}, api_key_slots=["{DEHASHED_AUTH_BASE64}"], @@ -924,15 +922,6 @@ AUTHENTICATED_PREMIUM_SOURCES += [ tags=["passive"], health_check_url="https://api.tines.com", reliability_score=3), - _auth("leakstats_pw", "breaches", - "https://leakstats.net/api/password/{target}", "GET", - {"count": "$.count"}, - headers={"api-key": "{LEAKSTATS_API_KEY}"}, - api_key_slots=["{LEAKSTATS_API_KEY}"], - input_type="hash", output_type=["hash"], - tags=["passive"], - health_check_url="https://leakstats.net", reliability_score=3, is_volatile=True), - _base("leak_lookup", "breaches", "https://leak-lookup.com/api/search", "POST", {"results": "$.message"}, @@ -1238,14 +1227,7 @@ AUTHENTICATED_PREMIUM_SOURCES += [ health_check_url="http://apilayer.net", reliability_score=4), # ── Hashes ──────────────────────────────────────────────────────────────── - - _auth("hashes_org", "hashes", - "https://hashes.org/api.php?key={HASHES_API_KEY}&query={target}", "GET", - {"found": "$.results"}, - api_key_slots=["{HASHES_API_KEY}"], - input_type="hash", output_type=["hash"], - tags=["passive"], - health_check_url="https://hashes.org", reliability_score=3), + # hashes_org removed — service unavailable # ── Search ──────────────────────────────────────────────────────────────── @@ -1265,6 +1247,66 @@ AUTHENTICATED_PREMIUM_SOURCES += [ input_type="any", output_type=["url"], tags=["passive"], health_check_url="https://api.bing.microsoft.com", reliability_score=5), + + # ── New authenticated sources (v1.0.1) ─────────────────────────────────── + + _auth("threatfox", "threat_intel", + "https://threatfox-api.abuse.ch/api/v1/", "POST", + {"results": "$.data"}, + headers={"API-KEY": "{THREATFOX_API_KEY}", "Content-Type": "application/json"}, + payload_template={"query": "search_ioc", "search_term": "{target}"}, + api_key_slots=["{THREATFOX_API_KEY}"], + input_type="any", output_type=["ip", "domain", "hash"], + normalization_map={"ioc": "indicator", "malware": "malware_family"}, + tags=["passive", "threat"], + health_check_url="https://threatfox-api.abuse.ch", + reliability_score=5), + + _auth("urlhaus", "threat_intel", + "https://urlhaus-api.abuse.ch/v1/host/", "POST", + {"urls": "$.urls"}, + headers={"Auth-Key": "{URLHAUS_API_KEY}"}, + payload_template={"host": "{target}"}, + api_key_slots=["{URLHAUS_API_KEY}"], + input_type="domain", output_type=["url", "domain"], + normalization_map={"url": "malware_url", "threat": "threat_type"}, + tags=["passive", "threat"], + health_check_url="https://urlhaus-api.abuse.ch", + reliability_score=5), + + _auth("malwarebazaar", "hashes", + "https://mb-api.abuse.ch/api/v1/", "POST", + {"data": "$.data"}, + headers={"API-KEY": "{MALWAREBAZAAR_API_KEY}"}, + payload_template={"query": "get_info", "hash": "{target}"}, + api_key_slots=["{MALWAREBAZAAR_API_KEY}"], + input_type="hash", output_type=["hash"], + normalization_map={"file_name": "filename", "tags": "tags"}, + tags=["passive", "threat"], + health_check_url="https://mb-api.abuse.ch", + reliability_score=5), + + _auth("fullhunt_subdomains", "dns_recon", + "https://fullhunt.io/api/v1/domain/{target}/subdomains", "GET", + {"hosts": "$.hosts"}, + headers={"X-API-KEY": "{FULLHUNT_API_KEY}"}, + api_key_slots=["{FULLHUNT_API_KEY}"], + input_type="domain", output_type=["domain", "ip"], + normalization_map={"host": "domain"}, + tags=["passive", "infrastructure"], + health_check_url="https://fullhunt.io", + reliability_score=4), + + _auth("netlas_search", "scanners", + "https://app.netlas.io/api/responses/?q={target}&source_type=include&start=0&fields=", "GET", + {"items": "$.items"}, + headers={"X-API-Key": "{NETLAS_API_KEY}"}, + api_key_slots=["{NETLAS_API_KEY}"], + input_type="ip", output_type=["ip", "domain"], + normalization_map={"data.ip": "ip_address", "data.domain": "domain"}, + tags=["passive", "infrastructure"], + health_check_url="https://app.netlas.io", + reliability_score=4), ] diff --git a/nox.py b/nox.py index 8a91fdf..0ca56e8 100644 --- a/nox.py +++ b/nox.py @@ -1539,19 +1539,37 @@ def _build_ssl_context() -> ssl.SSLContext: _SSL_CTX = _build_ssl_context() +# SearXNG public instance pool — used by DorkingEngine and ScrapeEngine. +# Instances are rotated randomly; proxy rotation distributes load across IPs. +_SEARX_INSTANCES = [ + "https://searx.tiekoetter.com", + "https://search.sapti.me", + "https://searx.perennialte.ch", + "https://search.mdosch.de", + "https://paulgo.io", + "https://priv.au", +] + # ── Header randomisation helpers ────────────────────────────────────── _UA_POOL = [ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; rv:133.0) Gecko/20100101 Firefox/133.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0", - "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0", - "Mozilla/5.0 (iPhone; CPU iPhone OS 18_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Mobile/15E148 Safari/604.1", - "Mozilla/5.0 (Android 15; Mobile; rv:133.0) Gecko/133.0 Firefox/133.0", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:136.0) Gecko/20100101 Firefox/136.0", + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0", + "Mozilla/5.0 (iPhone; CPU iPhone OS 18_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (Android 15; Mobile; rv:136.0) Gecko/136.0 Firefox/136.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15", +] + +_CH_UA_MAP = [ + # Order matters: more specific patterns first + ("Edg/135", '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"'), + ("Chrome/135", '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"'), + ("Chrome/134", '"Google Chrome";v="134", "Not-A.Brand";v="8", "Chromium";v="134"'), ] _ACCEPT_LANG_POOL = [ @@ -1568,20 +1586,32 @@ _SEC_FETCH_SITE_POOL = ["none", "same-origin", "cross-site", "same-site"] def _random_headers(extra: Optional[Dict] = None) -> Dict[str, str]: - """Return a randomised, browser-grade header set.""" + """Return a randomised, browser-grade header set with Client Hints for Chromium UAs.""" + ua = random.choice(_UA_POOL) h = { - "User-Agent": random.choice(_UA_POOL), - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "Accept-Language": random.choice(_ACCEPT_LANG_POOL), - "Accept-Encoding": "gzip, deflate, br", - "DNT": "1", - "Connection": "keep-alive", + "User-Agent": ua, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": random.choice(_ACCEPT_LANG_POOL), + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", - "Sec-Fetch-Dest": random.choice(_SEC_FETCH_DEST_POOL), - "Sec-Fetch-Mode": random.choice(_SEC_FETCH_MODE_POOL), - "Sec-Fetch-Site": random.choice(_SEC_FETCH_SITE_POOL), - "Cache-Control": "max-age=0", + "Sec-Fetch-Dest": random.choice(_SEC_FETCH_DEST_POOL), + "Sec-Fetch-Mode": random.choice(_SEC_FETCH_MODE_POOL), + "Sec-Fetch-Site": random.choice(_SEC_FETCH_SITE_POOL), + "Cache-Control": "max-age=0", } + # Add Sec-CH-UA Client Hints for Chromium-based UAs (Firefox omits these) + if "Firefox" not in ua: + ch_ua = next((v for k, v in _CH_UA_MAP if k in ua), None) + if ch_ua: + h["Sec-CH-UA"] = ch_ua + h["Sec-CH-UA-Mobile"] = "?0" + h["Sec-CH-UA-Platform"] = ( + '"Windows"' if "Windows" in ua else + '"macOS"' if "Mac" in ua else + '"Linux"' + ) if extra: h.update(extra) return h @@ -1675,7 +1705,7 @@ class AsyncSource(ABC): return resp.status, await resp.text(errors="replace"), body except Exception as exc: if attempt < Cfg.RETRIES - 1: - await asyncio.sleep(Cfg.RETRY_DELAY * (attempt + 1)) + await asyncio.sleep(Cfg.RETRY_DELAY * (2 ** attempt) + random.uniform(0, 1)) continue _syslog.debug("API_FAIL source=%s url=%s error=%s", self.name, url[:80], exc) return 0, "", b"" @@ -1694,7 +1724,7 @@ class AsyncSource(ABC): if resp.status == 429: retry_after = int(resp.headers.get("Retry-After", Cfg.RETRY_DELAY * (attempt + 2))) _syslog.info("RATE_LIMIT source=%s url=%s retry_after=%ds", self.name, url[:80], retry_after) - await asyncio.sleep(min(retry_after, Cfg.RETRY_DELAY * (attempt + 2))) + await asyncio.sleep(min(retry_after, 30)) continue body = await resp.read() if resp.status >= 400: @@ -1705,7 +1735,7 @@ class AsyncSource(ABC): if resp.status == 429: retry_after = int(resp.headers.get("Retry-After", Cfg.RETRY_DELAY * (attempt + 2))) _syslog.info("RATE_LIMIT source=%s url=%s retry_after=%ds", self.name, url[:80], retry_after) - await asyncio.sleep(min(retry_after, Cfg.RETRY_DELAY * (attempt + 2))) + await asyncio.sleep(min(retry_after, 30)) continue body = await resp.read() if resp.status >= 400: @@ -1713,7 +1743,7 @@ class AsyncSource(ABC): return resp.status, await resp.text(errors="replace"), body except Exception as exc: if attempt < Cfg.RETRIES - 1: - await asyncio.sleep(Cfg.RETRY_DELAY * (attempt + 1)) + await asyncio.sleep(Cfg.RETRY_DELAY * (2 ** attempt) + random.uniform(0, 1)) continue _syslog.debug("API_FAIL source=%s url=%s error=%s", self.name, url[:80], exc) return 0, "", b"" @@ -1887,7 +1917,7 @@ class Session: return r except Exception as e: if attempt < Cfg.RETRIES - 1: - time.sleep(Cfg.RETRY_DELAY * (attempt + 1)) + time.sleep(Cfg.RETRY_DELAY * (2 ** attempt) + random.uniform(0, 1)) continue logger.debug("GET fail %s: %s", url, e) return self._null_response(url) @@ -1919,7 +1949,7 @@ class Session: return self._make_response(raw.status, rd, dict(raw.headers), raw.url) except Exception as e: if attempt < Cfg.RETRIES - 1: - time.sleep(Cfg.RETRY_DELAY * (attempt + 1)) + time.sleep(Cfg.RETRY_DELAY * (2 ** attempt) + random.uniform(0, 1)) continue logger.debug("POST fail %s: %s", url, e) return self._null_response(url) @@ -2251,43 +2281,37 @@ class DorkingEngine(Src): return meta async def _ddg_search(self, query: str, _session=None) -> List[dict]: - """DDG search with proxy rotation and circuit-breaker (max 3 retries).""" + """DDG HTML is bot-blocked since 2025. Use SearXNG public JSON API.""" if not aiohttp_mod: return [] try: from aiohttp_socks import ProxyConnector as _ProxyConnector except ImportError: _ProxyConnector = None - url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}" - for attempt in range(3): - proxy = self._get_next_proxy() - ua = random.choice(_UA_POOL) - headers = {"User-Agent": ua} - try: - if proxy and _ProxyConnector: - connector = _ProxyConnector.from_url(proxy) - else: - connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX) - # Create session once per attempt; close it before the next retry. - async with aiohttp_mod.ClientSession(connector=connector) as sess: - async with sess.get(url, headers=headers, - timeout=aiohttp_mod.ClientTimeout(total=12)) as resp: - if resp.status in (403, 429): + instance = random.choice(_SEARX_INSTANCES) + url = f"{instance}/search?q={urllib.parse.quote(query)}&format=json&categories=general" + proxy = self._get_next_proxy() + try: + if proxy and _ProxyConnector: + connector = _ProxyConnector.from_url(proxy) + else: + connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX) + async with aiohttp_mod.ClientSession(connector=connector) as sess: + async with sess.get(url, headers=_random_headers(), + timeout=aiohttp_mod.ClientTimeout(total=12)) as resp: + if resp.status != 200: + if proxy: self._dead_proxies.add(proxy) - next_p = self._get_next_proxy() - logger.warning("[!] Proxy Ban detected. Rotating to %s...", next_p) - continue - text = await resp.text(errors="replace") - hits = [] - for m in re.finditer(r'class="result__url"[^>]*>([^<]+)<', text): - raw = m.group(1).strip() - if raw: - hits.append({"url": raw if raw.startswith("http") else "https://" + raw, - "title": "", "dork": query}) - return hits[:5] - except Exception: - if proxy: - self._dead_proxies.add(proxy) + return [] + data = await resp.json(content_type=None) + return [ + {"url": r.get("url", ""), "title": r.get("title", ""), "dork": query} + for r in data.get("results", [])[:5] + if r.get("url") + ] + except Exception: + if proxy: + self._dead_proxies.add(proxy) return [] async def async_search(self, session, query: str, qtype: str) -> List[Record]: @@ -2425,16 +2449,28 @@ class DorkEngine: urls = { "google": f"https://www.google.com/search?q={urllib.parse.quote(query)}&num=10", "bing": f"https://www.bing.com/search?q={urllib.parse.quote(query)}&count=10", - "ddg": f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}", + "ddg": f"{random.choice(_SEARX_INSTANCES)}/search?q={urllib.parse.quote(query)}&format=json&categories=general", } - resp = self.s.get(urls.get(engine, urls["google"]), timeout=15, use_cloudscraper=True) - if not resp.ok or not BeautifulSoup: + use_cs = engine != "ddg" # SearXNG is a plain JSON API — no cloudscraper needed + resp = self.s.get(urls.get(engine, urls["google"]), timeout=15, use_cloudscraper=use_cs) + if not resp.ok: + return hits + # DDG/SearXNG returns JSON + if engine == "ddg": + try: + data = resp.json() + for r in data.get("results", [])[:10]: + if r.get("url"): + hits.append({"title": r.get("title", ""), "url": r["url"], "snippet": r.get("content", "")}) + except Exception: + pass + return hits + if not BeautifulSoup: return hits soup = BeautifulSoup(resp.text, "html.parser") selectors = { "google": ("div.g", "h3", "a[href]", ".VwiC3b"), "bing": ("li.b_algo", "h2", "a", ".b_caption p"), - "ddg": (".result", ".result__title", ".result__url", ".result__snippet"), } container, title_sel, link_sel, snippet_sel = selectors.get(engine, selectors["google"]) for item in soup.select(container)[:10]: @@ -2458,18 +2494,8 @@ class DorkEngine: # ======================================================================= class ScrapeEngine: PASTE_SITES = [ - ("Pastebin", "https://psbdmp.ws/api/v3/search/{q}", "json"), + # Paste intelligence is routed through SearXNG dorks and IntelX. ("IntelX", "https://2.intelx.io/intelligent/search", "intelx"), - ("Paste.ee", "https://api.paste.ee/v1/search?query={q}", "json"), - ("Rentry", "https://rentry.co/api/search?q={q}", "json"), - ("Ghostbin", "https://ghostbin.com/api/search?q={q}", "json"), - ("JustPaste", "https://justpaste.it/api/search?q={q}", "json"), - ("DPaste", "https://dpaste.org/api/search?q={q}", "json"), - ("Hastebin", "https://hastebin.com/api/search?q={q}", "json"), - ("PrivateBin", "https://privatebin.net/api/search?q={q}", "json"), - ("ControlC", "https://controlc.com/api/search?q={q}", "json"), - ("Paste2", "https://paste2.org/api/search?q={q}", "json"), - ("PastebinPro", "https://pastebin.com/api/api_search.php?q={q}", "xml"), ] CRED_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.-]+\s*[:;|]\s*\S+", re.IGNORECASE) @@ -2594,13 +2620,15 @@ class ScrapeEngine: } for sq in _ddg_queries.get(qt, [f'"{q}" password leak', f'"{q}" database dump']): try: - resp = self.s.get(f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(sq)}", timeout=10, use_cloudscraper=True) - if resp.ok and BeautifulSoup: - soup = BeautifulSoup(resp.text, "html.parser") - for r in soup.select(".result")[:5]: - title_el = r.select_one(".result__title") - if title_el: - results["pastes"].append({"site":"DDG","title":title_el.get_text().strip(),"query":sq}) + resp = self.s.get(f"{random.choice(_SEARX_INSTANCES)}/search?q={urllib.parse.quote(sq)}&format=json&categories=general", timeout=10) + if resp.ok: + try: + data = resp.json() + for r in data.get("results", [])[:5]: + if r.get("title"): + results["pastes"].append({"site": "SearXNG", "title": r["title"], "url": r.get("url", ""), "query": sq}) + except Exception: + pass except Exception: continue @@ -2670,18 +2698,19 @@ class ScrapeEngine: ] for dork in dorks: try: - resp = self.s.get(f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(dork)}", timeout=10, use_cloudscraper=True) - if resp.ok and BeautifulSoup: - soup = BeautifulSoup(resp.text, "html.parser") - for r in soup.select(".result")[:5]: - title_el = r.select_one(".result__title") - url_el = r.select_one(".result__url") - if title_el: - hits.append({ - "dork": dork, - "title": title_el.get_text().strip(), - "url": url_el.get_text().strip() if url_el else "", - }) + resp = self.s.get(f"{random.choice(_SEARX_INSTANCES)}/search?q={urllib.parse.quote(dork)}&format=json&categories=general", timeout=10) + if resp.ok: + try: + data = resp.json() + for r in data.get("results", [])[:5]: + if r.get("title"): + hits.append({ + "dork": dork, + "title": r["title"], + "url": r.get("url", ""), + }) + except Exception: + pass time.sleep(random.uniform(2.0, 4.0)) except Exception: continue @@ -2694,29 +2723,13 @@ class ScrapeEngine: data = paste.get("data",{}) if not pid: return "" - raw_urls = { - "Pastebin": f"https://psbdmp.ws/api/v3/dump/{pid}", - "Rentry": f"https://rentry.co/api/raw/{pid}", - "Hastebin": f"https://hastebin.com/raw/{pid}", - "DPaste": f"https://dpaste.org/{pid}/raw/", - "Ghostbin": f"https://ghostbin.com/paste/{pid}/raw", - "JustPaste": f"https://justpaste.it/{pid}", - "PrivateBin": f"https://privatebin.net/?{pid}", - "ControlC": f"https://controlc.com/{pid}", - "Paste2": f"https://paste2.org/raw/{pid}", - "PastebinPro":f"https://pastebin.com/raw/{pid}", - } + raw_urls: dict = {} # paste fetch URLs — resolved per site name if site == "IntelX": key = self.db.get_key("intelx") if key: resp = self.s.get(f"https://2.intelx.io/file/read?type=1&systemid={pid}&k={key}", timeout=15) if resp.ok: return resp.text[:10000] - elif site == "Paste.ee": - resp = self.s.get(f"https://api.paste.ee/v1/pastes/{pid}", timeout=10) - if resp.ok: - sections = resp.json().get("paste",{}).get("sections",[]) - return "\n".join(s.get("contents","") for s in sections)[:10000] elif site in raw_urls: resp = self.s.get(raw_urls[site], timeout=10) if resp.ok and resp.text: @@ -2844,29 +2857,24 @@ class HashEngine: return list(set(mutations)) def _online(self, h: str) -> Optional[str]: - apis = [ - (f"https://www.nitrxgen.net/md5db/{h}", "text"), - (f"https://hashes.org/api.php?key=&query={h}", "json"), - (f"https://hash.help/api/lookup/{h}", "json"), - (f"https://hashkiller.io/api/search.php?hash={h}", "json"), - ] + # cmd5.org removed — paywalled, returns error for all hashes + # hashes.com requires a paid API key (HASHES_COM_API_KEY) + try: + from sources.helpers.config_handler import ConfigManager # type: ignore + key = ConfigManager.get_key("HASHES_COM_API_KEY") + if not key: + return None + apis = [(f"https://hashes.com/en/api/search?hash={h}&key={key}", "json")] + except Exception: + return None _get = self._session.get if self._session else (lambda url, **kw: Session._null_response(url)) for url, fmt in apis: try: resp = _get(url, timeout=8) if not resp.ok: continue - if fmt == "text": - text = resp.text.strip() - if not text or len(text) >= 100: - continue - tl = text.lower() - if any(tl.startswith(p) for p in ("not found", "error", "invalid", "no result", "not in", "cmd5-error", "not exist", "code erreur", "erreur", "unknown")): - continue - return text - elif fmt == "json": - data = resp.json() - if data.get("result") or data.get("plaintext"): - return data.get("result", data.get("plaintext","")) + data = resp.json() + if data.get("result") or data.get("plaintext"): + return data.get("result", data.get("plaintext", "")) except Exception: continue return None @@ -7090,15 +7098,30 @@ def _main_run(args, config: NoxConfig, db: NoxDB) -> None: if not candidate.is_dir(): candidate = Path("/usr/share/nox-cli/sources") if candidate.is_dir(): + # Copy all current package sources to runtime dir count = 0 + pkg_names = set() for jf in candidate.glob("*.json"): + pkg_names.add(jf.name) dst = SOURCE_DIR / jf.name try: _shutil.copy2(jf, dst) count += 1 except OSError: pass - out("ok", f"Reset {count} source plugins from package.") + # Remove orphaned plugins no longer in the package + removed = 0 + for existing in SOURCE_DIR.glob("*.json"): + if existing.name not in pkg_names: + try: + existing.unlink() + removed += 1 + except OSError: + pass + msg = f"Reset {count} source plugins from package." + if removed: + msg += f" Removed {removed} orphaned plugin(s)." + out("ok", msg) else: out("warn", "Package sources directory not found.") return diff --git a/pyproject.toml b/pyproject.toml index 1cafac4..42fbc19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "nox-cli" -version = "1.0.0" +version = "1.0.1" description = "Advanced Asynchronous Cyber Threat Intelligence Framework" readme = { file = "README.md", content-type = "text/markdown" } license = { text = "Apache-2.0" } @@ -15,6 +15,7 @@ dependencies = [ "aiohttp-socks>=0.8.4", "aiosqlite>=0.20.0", "httpx[http2]>=0.27.0", + "brotli>=1.1.0", "requests>=2.31.0", "certifi>=2024.2.2", "cloudscraper>=1.2.71", diff --git a/requirements.txt b/requirements.txt index acbd38a..ccd6135 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ aiohttp>=3.9.0 aiohttp-socks>=0.8.4 # SOCKS4/5 proxy support for aiohttp aiosqlite>=0.20.0 # async SQLite (forensic persistence DB) httpx[http2]>=0.27.0 # Guardian Engine: dynamic proxy fetch + HTTP/2 +brotli>=1.1.0 # brotli decompression for aiohttp br responses # ── Intelligence & Scraping ──────────────────────────────────────────── requests>=2.31.0 diff --git a/sources/bgpview_ip.json b/sources/bgpview_ip.json index 1ef9fe1..357192a 100644 --- a/sources/bgpview_ip.json +++ b/sources/bgpview_ip.json @@ -21,7 +21,8 @@ ], "health_check_url": "https://api.bgpview.io", "expected_status": 200, - "reliability_score": 4, + "reliability_score": 2, + "is_volatile": true, "backup_endpoints": [], - "confidence": 0.85 + "confidence": 0.55 } \ No newline at end of file diff --git a/sources/circl_hashlookup.json b/sources/circl_hashlookup.json new file mode 100644 index 0000000..183e192 --- /dev/null +++ b/sources/circl_hashlookup.json @@ -0,0 +1,31 @@ +{ + "name": "circl_hashlookup", + "category": "hashes", + "endpoint": "https://hashlookup.circl.lu/lookup/md5/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "filename": "$.FileName", + "known_malicious": "$.KnownMalicious" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": { + "FileName": "filename", + "MD5": "hash_md5" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://hashlookup.circl.lu", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/dehashed.json b/sources/dehashed.json index 7481b68..63b3331 100644 --- a/sources/dehashed.json +++ b/sources/dehashed.json @@ -1,7 +1,7 @@ { "name": "dehashed", "category": "breaches", - "endpoint": "https://api.dehashed.com/search?query={target}", + "endpoint": "https://api.dehashed.com/v2/search?query={target}", "method": "GET", "requires_auth": true, "selectors": { diff --git a/sources/duckduckgo_api.json b/sources/duckduckgo_api.json index 319195d..ff4b8c7 100644 --- a/sources/duckduckgo_api.json +++ b/sources/duckduckgo_api.json @@ -1,11 +1,11 @@ { "name": "duckduckgo_api", "category": "search", - "endpoint": "https://api.duckduckgo.com/?q={target}&format=json", + "endpoint": "https://searx.tiekoetter.com/search?q={target}&format=json&categories=general", "method": "GET", "requires_auth": false, "selectors": { - "abstract": "$.Abstract" + "results": "$.results" }, "rate_limit": 1.0, "headers": {}, @@ -14,14 +14,18 @@ "output_type": [ "url" ], - "normalization_map": {}, + "normalization_map": { + "url": "url", + "title": "title" + }, "tags": [ "passive", "fast" ], - "health_check_url": "https://api.duckduckgo.com", + "health_check_url": "https://searx.tiekoetter.com", "expected_status": 200, - "reliability_score": 5, + "reliability_score": 3, + "is_volatile": true, "backup_endpoints": [], - "confidence": 1.0 + "confidence": 0.7 } \ No newline at end of file diff --git a/sources/emailrep_io.json b/sources/emailrep_io.json index 25a9856..b77dd21 100644 --- a/sources/emailrep_io.json +++ b/sources/emailrep_io.json @@ -3,13 +3,17 @@ "category": "email_rep", "endpoint": "https://emailrep.io/{target}", "method": "GET", - "requires_auth": false, + "requires_auth": true, "selectors": { "reputation": "$.reputation" }, "rate_limit": 2.0, - "headers": {}, - "api_key_slots": [], + "headers": { + "Key": "{EMAILREP_API_KEY}" + }, + "api_key_slots": [ + "{EMAILREP_API_KEY}" + ], "input_type": "email", "output_type": [ "email" diff --git a/sources/fullhunt_subdomains.json b/sources/fullhunt_subdomains.json new file mode 100644 index 0000000..c57cdc6 --- /dev/null +++ b/sources/fullhunt_subdomains.json @@ -0,0 +1,34 @@ +{ + "name": "fullhunt_subdomains", + "category": "dns_recon", + "endpoint": "https://fullhunt.io/api/v1/domain/{target}/subdomains", + "method": "GET", + "requires_auth": true, + "selectors": { + "hosts": "$.hosts" + }, + "rate_limit": 1.0, + "headers": { + "X-API-KEY": "{FULLHUNT_API_KEY}" + }, + "api_key_slots": [ + "{FULLHUNT_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "domain", + "ip" + ], + "normalization_map": { + "host": "domain" + }, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://fullhunt.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/hashes_org.json b/sources/hashes_org.json deleted file mode 100644 index fb4714c..0000000 --- a/sources/hashes_org.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "name": "hashes_org", - "category": "hashes", - "endpoint": "https://hashes.org/api.php?key={HASHES_API_KEY}&query={target}", - "method": "GET", - "requires_auth": true, - "selectors": { - "found": "$.results" - }, - "rate_limit": 1.0, - "headers": {}, - "api_key_slots": [ - "{HASHES_API_KEY}" - ], - "input_type": "hash", - "output_type": [ - "hash" - ], - "normalization_map": {}, - "tags": [ - "passive" - ], - "health_check_url": "https://hashes.org", - "expected_status": 200, - "reliability_score": 3, - "backup_endpoints": [], - "confidence": 0.7 -} \ No newline at end of file diff --git a/sources/helpers/config_handler.py b/sources/helpers/config_handler.py index 50c8f55..b7c883e 100644 --- a/sources/helpers/config_handler.py +++ b/sources/helpers/config_handler.py @@ -139,6 +139,14 @@ SERVICE_REGISTRY: Dict[str, Dict] = { "WHOXY_API_KEY": {"display": "Whoxy WHOIS", "public": False}, "ZEROBOUNCE_API_KEY": {"display": "ZeroBounce", "public": False}, "ZOOMEYE_API_KEY": {"display": "ZoomEye", "public": False}, + # ── Added in v1.0.1 ─────────────────────────────────────────────── + "EMAILREP_API_KEY": {"display": "EmailRep.io", "public": False}, + "HASHES_COM_API_KEY": {"display": "Hashes.com (crack API)", "public": False}, + "THREATFOX_API_KEY": {"display": "ThreatFox (abuse.ch)", "public": False}, + "URLHAUS_API_KEY": {"display": "URLhaus (abuse.ch)", "public": False}, + "MALWAREBAZAAR_API_KEY": {"display": "MalwareBazaar (abuse.ch)", "public": False}, + "FULLHUNT_API_KEY": {"display": "FullHunt (attack surface)", "public": False}, + "NETLAS_API_KEY": {"display": "Netlas.io (internet scanner)", "public": False}, } _PRIVATE_KEYS = {k: v for k, v in SERVICE_REGISTRY.items() if not v["public"]} diff --git a/sources/helpers/cracker.py b/sources/helpers/cracker.py index 9ac2501..f424b73 100644 --- a/sources/helpers/cracker.py +++ b/sources/helpers/cracker.py @@ -67,43 +67,47 @@ async def _query_api(session, url: str, fmt: str) -> Optional[str]: async def async_crack(session, hash_value: str, hash_type: str) -> Optional[str]: """ - Query multiple rainbow-table APIs concurrently. - Returns first plaintext found, or None. bcrypt is skipped. + Attempt to recover the plaintext for a given hash. - C1: create tasks upfront for cancellation, but await each via asyncio.shield - inside as_completed — no double wait_for wrapping. - C2: for 32-char hex (md5/ntlm ambiguity), also query NTLM-specific APIs. + Strategy: + 1. Local rockyou wordlist (no external calls, no rate limits). + 2. hashes.com API if HASHES_COM_API_KEY is configured. - Per-API timeout: 8s. Global budget: 20s (CRACK_TIMEOUT). - All tasks are cancelled as soon as the first result is found. + bcrypt is skipped — computationally infeasible for online cracking. """ if hash_type == "bcrypt": return None h = hash_value.strip().lower() - apis = [ - (f"https://www.nitrxgen.net/md5db/{h}", "text"), - (f"https://hashes.com/en/api/hash?hash={h}", "json"), - (f"https://hash.help/api/lookup/{h}", "json"), - (f"https://hashkiller.io/api/search.php?hash={h}", "json"), - (f"https://md5decrypt.net/Api/api.php?hash={h}&hash_type={hash_type}&email=&code=", "text"), - (f"https://www.cmd5.org/api.ashx?hash={h}", "text"), - ] - # C2: for 32-char hashes (md5/ntlm ambiguous), add NTLM-specific endpoint - if hash_type == "md5" and len(h) == 32: - apis.append((f"https://hashes.com/en/api/hash?hash={h}&type=ntlm", "json")) - # C1: create tasks so we can cancel them; shield each before passing to wait_for - # so cancellation of the shield future does not cancel the underlying task prematurely. + # 1. Local wordlist first — fast, zero external exposure + import concurrent.futures as _cf + loop = asyncio.get_running_loop() + with _cf.ThreadPoolExecutor(max_workers=1) as _ex: + local = await loop.run_in_executor(_ex, _local_crack_sync_blocking, hash_value, hash_type) + if local: + return local + + # 2. hashes.com if API key is configured + apis = [] + try: + from sources.helpers.config_handler import ConfigManager # type: ignore + hashes_com_key = ConfigManager.get_key("HASHES_COM_API_KEY") + if hashes_com_key: + apis.append((f"https://hashes.com/en/api/search?hash={h}&key={hashes_com_key}", "json")) + except Exception: + pass + + if not apis: + return None + tasks = [asyncio.create_task(_query_api(session, url, fmt)) for url, fmt in apis] result: Optional[str] = None try: for fut in asyncio.as_completed(tasks): try: res = await asyncio.wait_for(asyncio.shield(fut), timeout=_API_TIMEOUT) - except (asyncio.TimeoutError, asyncio.CancelledError): - continue - except Exception: + except (asyncio.TimeoutError, asyncio.CancelledError, Exception): continue if res: result = res @@ -111,9 +115,35 @@ async def async_crack(session, hash_value: str, hash_type: str) -> Optional[str] except Exception: pass finally: - # Cancel all remaining tasks and await to suppress pending-task warnings for t in tasks: if not t.done(): t.cancel() await asyncio.gather(*[t for t in tasks if not t.done()], return_exceptions=True) return result + + +def _local_crack_sync_blocking(hash_value: str, hash_type: str) -> Optional[str]: + """Pure-sync version for ThreadPoolExecutor.""" + import hashlib as _hl + from pathlib import Path as _Path + wordlist = _Path.home() / ".nox" / "wordlists" / "rockyou.txt" + if not wordlist.exists(): + return None + h = hash_value.strip().lower() + _hashers = { + "md5": lambda w: _hl.md5(w).hexdigest(), + "sha1": lambda w: _hl.sha1(w).hexdigest(), + "sha256": lambda w: _hl.sha256(w).hexdigest(), + } + hasher = _hashers.get(hash_type) + if not hasher: + return None + try: + with wordlist.open("rb") as f: + for line in f: + word = line.rstrip(b"\n\r") + if hasher(word) == h: + return word.decode("utf-8", errors="replace") + except Exception: + pass + return None diff --git a/sources/hudsonrock_osint.json b/sources/hudsonrock_osint.json index 3595d7f..0bcdfda 100644 --- a/sources/hudsonrock_osint.json +++ b/sources/hudsonrock_osint.json @@ -1,7 +1,7 @@ { "name": "hudsonrock_osint", "category": "breach_data", - "endpoint": "https://cavalier.hudsonrock.com/api/json/v2/osint-tools/search-by-login?username={target}", + "endpoint": "https://cavalier.hudsonrock.com/api/json/v2/osint-tools/search-by-email?email={target}", "method": "GET", "requires_auth": false, "selectors": { @@ -10,10 +10,11 @@ "rate_limit": 1.0, "headers": {}, "api_key_slots": [], - "input_type": "username", + "input_type": "email", "output_type": [ "email", - "domain" + "domain", + "username" ], "normalization_map": { "stealers": "breach_record" diff --git a/sources/ipapi_is.json b/sources/ipapi_is.json new file mode 100644 index 0000000..bd43176 --- /dev/null +++ b/sources/ipapi_is.json @@ -0,0 +1,33 @@ +{ + "name": "ipapi_is", + "category": "geolocation", + "endpoint": "https://api.ipapi.is/?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "org": "$.org", + "asn": "$.asn.asn", + "abuse": "$.abuse.email" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "domain" + ], + "normalization_map": { + "org": "asn_org", + "asn": "asn_number", + "abuse": "abuse_contact" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.ipapi.is", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/leakstats_pw.json b/sources/leakstats_pw.json deleted file mode 100644 index d73436c..0000000 --- a/sources/leakstats_pw.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "name": "leakstats_pw", - "category": "breaches", - "endpoint": "https://leakstats.net/api/password/{target}", - "method": "GET", - "requires_auth": true, - "selectors": { - "count": "$.count" - }, - "rate_limit": 1.0, - "headers": { - "api-key": "{LEAKSTATS_API_KEY}" - }, - "api_key_slots": [ - "{LEAKSTATS_API_KEY}" - ], - "input_type": "hash", - "output_type": [ - "hash" - ], - "normalization_map": {}, - "tags": [ - "passive" - ], - "health_check_url": "https://leakstats.net", - "expected_status": 200, - "reliability_score": 3, - "is_volatile": true, - "backup_endpoints": [], - "confidence": 0.7 -} \ No newline at end of file diff --git a/sources/malwarebazaar.json b/sources/malwarebazaar.json new file mode 100644 index 0000000..5ffb968 --- /dev/null +++ b/sources/malwarebazaar.json @@ -0,0 +1,38 @@ +{ + "name": "malwarebazaar", + "category": "hashes", + "endpoint": "https://mb-api.abuse.ch/api/v1/", + "method": "POST", + "requires_auth": true, + "selectors": { + "data": "$.data" + }, + "rate_limit": 1.0, + "headers": { + "API-KEY": "{MALWAREBAZAAR_API_KEY}" + }, + "payload_template": { + "query": "get_info", + "hash": "{target}" + }, + "api_key_slots": [ + "{MALWAREBAZAAR_API_KEY}" + ], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": { + "file_name": "filename", + "tags": "tags" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://mb-api.abuse.ch", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/netlas_search.json b/sources/netlas_search.json new file mode 100644 index 0000000..c0dd59a --- /dev/null +++ b/sources/netlas_search.json @@ -0,0 +1,35 @@ +{ + "name": "netlas_search", + "category": "scanners", + "endpoint": "https://app.netlas.io/api/responses/?q={target}&source_type=include&start=0&fields=", + "method": "GET", + "requires_auth": true, + "selectors": { + "items": "$.items" + }, + "rate_limit": 1.0, + "headers": { + "X-API-Key": "{NETLAS_API_KEY}" + }, + "api_key_slots": [ + "{NETLAS_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip", + "domain" + ], + "normalization_map": { + "data.ip": "ip_address", + "data.domain": "domain" + }, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://app.netlas.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/checkleaked.json b/sources/proxynova_comb.json similarity index 51% rename from sources/checkleaked.json rename to sources/proxynova_comb.json index 5e8d786..d9502db 100644 --- a/sources/checkleaked.json +++ b/sources/proxynova_comb.json @@ -1,11 +1,11 @@ { - "name": "checkleaked", + "name": "proxynova_comb", "category": "breaches", - "endpoint": "https://api.checkleaked.cc/check/{target}", + "endpoint": "https://api.proxynova.com/comb?query={target}", "method": "GET", "requires_auth": false, "selectors": { - "found": "$.found" + "lines": "$.lines" }, "rate_limit": 1.0, "headers": {}, @@ -14,17 +14,17 @@ "output_type": [ "email" ], - "normalization_map": {}, + "normalization_map": { + "lines": "credential_line" + }, "tags": [ "passive", "stealth" ], - "health_check_url": "https://api.checkleaked.cc", + "health_check_url": "https://api.proxynova.com", "expected_status": 200, - "reliability_score": 2, + "reliability_score": 3, "is_volatile": true, - "backup_endpoints": [ - "https://checkleaked.cc/api/check/{target}" - ], - "confidence": 0.55 + "backup_endpoints": [], + "confidence": 0.7 } \ No newline at end of file diff --git a/sources/scylla_sh_search.json b/sources/scylla_sh_search.json index eee1826..f44ed6c 100644 --- a/sources/scylla_sh_search.json +++ b/sources/scylla_sh_search.json @@ -1,7 +1,7 @@ { "name": "scylla_sh_search", "category": "breaches", - "endpoint": "https://scylla.sh/search?q={target}", + "endpoint": "https://scylla.so/search?q={target}", "method": "GET", "requires_auth": false, "selectors": { @@ -20,12 +20,16 @@ "passive", "stealth" ], - "health_check_url": "https://scylla.sh", + "health_check_url": "https://scylla.so", "expected_status": 200, "reliability_score": 2, "is_volatile": true, + "bypass_required": [ + "cloudflare" + ], + "user_agent_type": "browser", "backup_endpoints": [ - "https://scylla.sh/api/search?q={target}" + "https://scylla.so/api/search?q={target}" ], "confidence": 0.55 } \ No newline at end of file diff --git a/sources/shodan_internetdb.json b/sources/shodan_internetdb.json new file mode 100644 index 0000000..b0b78af --- /dev/null +++ b/sources/shodan_internetdb.json @@ -0,0 +1,34 @@ +{ + "name": "shodan_internetdb", + "category": "scanners", + "endpoint": "https://internetdb.shodan.io/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "hostnames": "$.hostnames", + "ports": "$.ports", + "vulns": "$.vulns" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "domain", + "ip" + ], + "normalization_map": { + "hostnames": "domain", + "vulns": "cve" + }, + "tags": [ + "passive", + "fast", + "infrastructure" + ], + "health_check_url": "https://internetdb.shodan.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/spyse_domain.json b/sources/spyse_domain.json deleted file mode 100644 index 773e40f..0000000 --- a/sources/spyse_domain.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "spyse_domain", - "category": "scanners", - "endpoint": "https://api.spyse.com/v1/domain/details/{target}", - "method": "GET", - "requires_auth": true, - "selectors": { - "asn": "$.data.asn" - }, - "rate_limit": 1.0, - "headers": { - "Authorization": "Bearer {SPYSE_API_KEY}" - }, - "api_key_slots": [ - "{SPYSE_API_KEY}" - ], - "input_type": "domain", - "output_type": [ - "ip" - ], - "normalization_map": {}, - "tags": [ - "passive" - ], - "health_check_url": "https://api.spyse.com", - "expected_status": 200, - "reliability_score": 3, - "backup_endpoints": [], - "confidence": 0.7 -} \ No newline at end of file diff --git a/sources/spyse_ip.json b/sources/spyse_ip.json deleted file mode 100644 index 1925cd2..0000000 --- a/sources/spyse_ip.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "spyse_ip", - "category": "scanners", - "endpoint": "https://api.spyse.com/v1/ip/details/{target}", - "method": "GET", - "requires_auth": true, - "selectors": { - "geo": "$.data.geo" - }, - "rate_limit": 1.0, - "headers": { - "Authorization": "Bearer {SPYSE_API_KEY}" - }, - "api_key_slots": [ - "{SPYSE_API_KEY}" - ], - "input_type": "ip", - "output_type": [ - "ip" - ], - "normalization_map": {}, - "tags": [ - "passive" - ], - "health_check_url": "https://api.spyse.com", - "expected_status": 200, - "reliability_score": 3, - "backup_endpoints": [], - "confidence": 0.7 -} \ No newline at end of file diff --git a/sources/threatcrowd_domain.json b/sources/threatcrowd_domain.json deleted file mode 100644 index 8f257a2..0000000 --- a/sources/threatcrowd_domain.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "name": "threatcrowd_domain", - "category": "threat_intel", - "endpoint": "https://www.threatcrowd.org/searchApi/v2/domain/report/?domain={target}", - "method": "GET", - "requires_auth": false, - "selectors": { - "ips": "$.resolutions[*].ip_address" - }, - "rate_limit": 5.0, - "headers": {}, - "api_key_slots": [], - "input_type": "domain", - "output_type": [ - "ip" - ], - "normalization_map": {}, - "tags": [ - "passive", - "threat" - ], - "health_check_url": "https://www.threatcrowd.org", - "expected_status": 200, - "reliability_score": 3, - "is_volatile": true, - "bypass_required": [ - "cloudflare" - ], - "user_agent_type": "browser", - "backup_endpoints": [], - "confidence": 0.7 -} \ No newline at end of file diff --git a/sources/threatcrowd_email.json b/sources/threatcrowd_email.json deleted file mode 100644 index fcc7dfc..0000000 --- a/sources/threatcrowd_email.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "name": "threatcrowd_email", - "category": "threat_intel", - "endpoint": "https://www.threatcrowd.org/searchApi/v2/email/report/?email={target}", - "method": "GET", - "requires_auth": false, - "selectors": { - "domains": "$.domains" - }, - "rate_limit": 5.0, - "headers": {}, - "api_key_slots": [], - "input_type": "email", - "output_type": [ - "domain" - ], - "normalization_map": {}, - "tags": [ - "passive", - "threat" - ], - "health_check_url": "https://www.threatcrowd.org", - "expected_status": 200, - "reliability_score": 3, - "is_volatile": true, - "bypass_required": [ - "cloudflare" - ], - "user_agent_type": "browser", - "backup_endpoints": [], - "confidence": 0.7 -} \ No newline at end of file diff --git a/sources/threatfox.json b/sources/threatfox.json new file mode 100644 index 0000000..f5f922a --- /dev/null +++ b/sources/threatfox.json @@ -0,0 +1,41 @@ +{ + "name": "threatfox", + "category": "threat_intel", + "endpoint": "https://threatfox-api.abuse.ch/api/v1/", + "method": "POST", + "requires_auth": true, + "selectors": { + "results": "$.data" + }, + "rate_limit": 1.0, + "headers": { + "API-KEY": "{THREATFOX_API_KEY}", + "Content-Type": "application/json" + }, + "payload_template": { + "query": "search_ioc", + "search_term": "{target}" + }, + "api_key_slots": [ + "{THREATFOX_API_KEY}" + ], + "input_type": "any", + "output_type": [ + "ip", + "domain", + "hash" + ], + "normalization_map": { + "ioc": "indicator", + "malware": "malware_family" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://threatfox-api.abuse.ch", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/urlhaus.json b/sources/urlhaus.json new file mode 100644 index 0000000..872544e --- /dev/null +++ b/sources/urlhaus.json @@ -0,0 +1,38 @@ +{ + "name": "urlhaus", + "category": "threat_intel", + "endpoint": "https://urlhaus-api.abuse.ch/v1/host/", + "method": "POST", + "requires_auth": true, + "selectors": { + "urls": "$.urls" + }, + "rate_limit": 1.0, + "headers": { + "Auth-Key": "{URLHAUS_API_KEY}" + }, + "payload_template": { + "host": "{target}" + }, + "api_key_slots": [ + "{URLHAUS_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "url", + "domain" + ], + "normalization_map": { + "url": "malware_url", + "threat": "threat_type" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://urlhaus-api.abuse.ch", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file