From 913e764133f04c4ac8fdbe00a16e3ce9d8e72d01 Mon Sep 17 00:00:00 2001 From: nox-project Date: Tue, 7 Apr 2026 10:17:43 +0200 Subject: [PATCH] NOX Framework v1.0.0 --- .github/workflows/ci.yml | 28 + .github/workflows/release.yml | 40 + .gitignore | 44 + CHANGELOG.md | 24 + CONTRIBUTING.md | 35 + LEGAL.md | 38 + LICENSE.txt | 207 + README.md | 502 ++ SECURITY.md | 34 + build_deb.sh | 41 + build_sources.py | 1341 +++++ debian/changelog | 5 + debian/control | 35 + debian/copyright | 24 + debian/install | 1 + debian/postrm | 11 + debian/prerm | 3 + debian/rules | 3 + debian/tests/control | 3 + debian/tests/smoke | 5 + docs/nox-cli.1 | 109 + nox-wrapper.sh | 14 + nox.py | 7311 ++++++++++++++++++++++++++ postinst.sh | 51 + pyproject.toml | 40 + requirements.txt | 34 + setup.py | 30 + sources/abstract_email.json | 29 + sources/abuseipdb.json | 33 + sources/alienvault_otx_domain.json | 29 + sources/alienvault_otx_ip.json | 28 + sources/alienvault_otx_malware.json | 27 + sources/alienvault_otx_user.json | 26 + sources/anubis_subdomains.json | 27 + sources/anyrun.json | 32 + sources/bgpview_ip.json | 27 + sources/binaryedge_dns.json | 30 + sources/binaryedge_exposed.json | 33 + sources/bing_search_api.json | 30 + sources/breachaware.json | 32 + sources/breachdirectory.json | 29 + sources/censys_hosts.json | 33 + sources/checkleaked.json | 30 + sources/circl_lu_pdns.json | 30 + sources/cit0day.json | 32 + sources/clearbit_enrich.json | 33 + sources/criminalip_asset.json | 33 + sources/crt_sh.json | 31 + sources/cve_search.json | 28 + sources/cxsecurity.json | 27 + sources/dehashed.json | 41 + sources/dnsdb_pdns.json | 30 + sources/domaintools_whois.json | 31 + sources/duckduckgo_api.json | 27 + sources/emailhippo.json | 29 + sources/emailrep_io.json | 29 + sources/extreme_ip_lookup.json | 28 + sources/fofa_info.json | 31 + sources/fraudlabspro.json | 31 + sources/fullcontact.json | 34 + sources/github_code_search.json | 30 + sources/github_search_repos.json | 30 + sources/github_users.json | 31 + sources/gitlab_search.json | 26 + sources/google_safebrowsing.json | 52 + sources/google_search_custom.json | 29 + sources/gravatar.json | 26 + sources/greynoise_community.json | 35 + sources/hackernews_user.json | 27 + sources/hackertarget_dnslookup.json | 27 + sources/hackertarget_hostsearch.json | 28 + sources/hackertarget_reverseip.json | 26 + sources/hackertarget_whois.json | 27 + sources/hashes_org.json | 28 + sources/helpers/__init__.py | 0 sources/helpers/config_handler.py | 243 + sources/helpers/cracker.py | 119 + sources/helpers/reporting.py | 658 +++ sources/helpers/scanner.py | 525 ++ sources/hibp_breached.json | 35 + sources/hudsonrock_osint.json | 30 + sources/hunter_io.json | 30 + sources/hunter_verify.json | 29 + sources/hybrid_analysis.json | 37 + sources/intelx_phone.json | 30 + sources/intelx_search.json | 44 + sources/intezer.json | 31 + sources/ipapi_co.json | 33 + sources/ipdata_co.json | 31 + sources/ipgeolocation_io.json | 31 + sources/ipinfo_io.json | 31 + sources/ipinfodb.json | 30 + sources/ipqualityscore_email.json | 31 + sources/ipstack.json | 31 + sources/ipvigilante.json | 27 + sources/joesandbox.json | 32 + sources/keybase_lookup.json | 26 + sources/keybase_proofs.json | 26 + sources/leak_lookup.json | 32 + sources/leakcheck.json | 33 + sources/leakix_search.json | 31 + sources/leakstats_pw.json | 31 + sources/mailboxlayer.json | 28 + sources/malshare.json | 29 + sources/maltiverse_ip.json | 27 + sources/metadefender_ip.json | 31 + sources/misp_search.json | 38 + sources/npm_user.json | 26 + sources/numverify.json | 32 + sources/onyphe_datascan.json | 32 + sources/packetstorm.json | 26 + sources/passivetotal_enrich.json | 30 + sources/passivetotal_whois.json | 33 + sources/phishtank_check.json | 31 + sources/pipl_search.json | 30 + sources/pulsedive.json | 29 + sources/pulsedive_analyze.json | 34 + sources/pypi_user.json | 26 + sources/recordedfuture_ip.json | 33 + sources/reddit_user.json | 28 + sources/robtex_ip.json | 27 + sources/scamwatcher.json | 30 + sources/scylla_sh_search.json | 31 + sources/securitytrails_history.json | 32 + sources/securitytrails_sub.json | 30 + sources/shodan_dns.json | 29 + sources/shodan_exploits.json | 28 + sources/shodan_host.json | 34 + sources/shodan_search.json | 31 + sources/snusbase.json | 46 + sources/spycloud_breach.json | 38 + sources/spyonweb.json | 28 + sources/spyse_domain.json | 30 + sources/spyse_ip.json | 30 + sources/sublist3r_api.json | 27 + sources/threatconnect_search.json | 32 + sources/threatcrowd_domain.json | 32 + sources/threatcrowd_email.json | 32 + sources/threatminer_domain.json | 28 + sources/threatminer_ip.json | 28 + sources/threatportal.json | 33 + sources/tines_breach.json | 30 + sources/twitter_v2.json | 30 + sources/urlscan_search.json | 28 + sources/urlvoid.json | 29 + sources/viewdns_reverse_ip.json | 28 + sources/vigilante_pw.json | 28 + sources/virustotal_domain.json | 33 + sources/virustotal_ip.json | 33 + sources/vulners_search.json | 30 + sources/wayback_machine.json | 26 + sources/whois_freaks.json | 28 + sources/whoisxml_api.json | 31 + sources/whoxy_whois.json | 29 + sources/zerobounce.json | 31 + sources/zoomeye_host.json | 31 + tests/__init__.py | 1 + tests/test_cracker.py | 26 + tests/test_detect.py | 28 + tests/test_identity.py | 45 + tests/test_reporting.py | 39 + tests/test_risk.py | 38 + tests/test_scanner.py | 90 + 163 files changed, 15613 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 CONTRIBUTING.md create mode 100644 LEGAL.md create mode 100644 LICENSE.txt create mode 100644 README.md create mode 100644 SECURITY.md create mode 100755 build_deb.sh create mode 100644 build_sources.py create mode 100644 debian/changelog create mode 100644 debian/control create mode 100644 debian/copyright create mode 100644 debian/install create mode 100755 debian/postrm create mode 100755 debian/prerm create mode 100755 debian/rules create mode 100644 debian/tests/control create mode 100755 debian/tests/smoke create mode 100644 docs/nox-cli.1 create mode 100755 nox-wrapper.sh create mode 100644 nox.py create mode 100755 postinst.sh create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 sources/abstract_email.json create mode 100644 sources/abuseipdb.json create mode 100644 sources/alienvault_otx_domain.json create mode 100644 sources/alienvault_otx_ip.json create mode 100644 sources/alienvault_otx_malware.json create mode 100644 sources/alienvault_otx_user.json create mode 100644 sources/anubis_subdomains.json create mode 100644 sources/anyrun.json create mode 100644 sources/bgpview_ip.json create mode 100644 sources/binaryedge_dns.json create mode 100644 sources/binaryedge_exposed.json create mode 100644 sources/bing_search_api.json create mode 100644 sources/breachaware.json create mode 100644 sources/breachdirectory.json create mode 100644 sources/censys_hosts.json create mode 100644 sources/checkleaked.json create mode 100644 sources/circl_lu_pdns.json create mode 100644 sources/cit0day.json create mode 100644 sources/clearbit_enrich.json create mode 100644 sources/criminalip_asset.json create mode 100644 sources/crt_sh.json create mode 100644 sources/cve_search.json create mode 100644 sources/cxsecurity.json create mode 100644 sources/dehashed.json create mode 100644 sources/dnsdb_pdns.json create mode 100644 sources/domaintools_whois.json create mode 100644 sources/duckduckgo_api.json create mode 100644 sources/emailhippo.json create mode 100644 sources/emailrep_io.json create mode 100644 sources/extreme_ip_lookup.json create mode 100644 sources/fofa_info.json create mode 100644 sources/fraudlabspro.json create mode 100644 sources/fullcontact.json create mode 100644 sources/github_code_search.json create mode 100644 sources/github_search_repos.json create mode 100644 sources/github_users.json create mode 100644 sources/gitlab_search.json create mode 100644 sources/google_safebrowsing.json create mode 100644 sources/google_search_custom.json create mode 100644 sources/gravatar.json create mode 100644 sources/greynoise_community.json create mode 100644 sources/hackernews_user.json create mode 100644 sources/hackertarget_dnslookup.json create mode 100644 sources/hackertarget_hostsearch.json create mode 100644 sources/hackertarget_reverseip.json create mode 100644 sources/hackertarget_whois.json create mode 100644 sources/hashes_org.json create mode 100644 sources/helpers/__init__.py create mode 100644 sources/helpers/config_handler.py create mode 100644 sources/helpers/cracker.py create mode 100644 sources/helpers/reporting.py create mode 100644 sources/helpers/scanner.py create mode 100644 sources/hibp_breached.json create mode 100644 sources/hudsonrock_osint.json create mode 100644 sources/hunter_io.json create mode 100644 sources/hunter_verify.json create mode 100644 sources/hybrid_analysis.json create mode 100644 sources/intelx_phone.json create mode 100644 sources/intelx_search.json create mode 100644 sources/intezer.json create mode 100644 sources/ipapi_co.json create mode 100644 sources/ipdata_co.json create mode 100644 sources/ipgeolocation_io.json create mode 100644 sources/ipinfo_io.json create mode 100644 sources/ipinfodb.json create mode 100644 sources/ipqualityscore_email.json create mode 100644 sources/ipstack.json create mode 100644 sources/ipvigilante.json create mode 100644 sources/joesandbox.json create mode 100644 sources/keybase_lookup.json create mode 100644 sources/keybase_proofs.json create mode 100644 sources/leak_lookup.json create mode 100644 sources/leakcheck.json create mode 100644 sources/leakix_search.json create mode 100644 sources/leakstats_pw.json create mode 100644 sources/mailboxlayer.json create mode 100644 sources/malshare.json create mode 100644 sources/maltiverse_ip.json create mode 100644 sources/metadefender_ip.json create mode 100644 sources/misp_search.json create mode 100644 sources/npm_user.json create mode 100644 sources/numverify.json create mode 100644 sources/onyphe_datascan.json create mode 100644 sources/packetstorm.json create mode 100644 sources/passivetotal_enrich.json create mode 100644 sources/passivetotal_whois.json create mode 100644 sources/phishtank_check.json create mode 100644 sources/pipl_search.json create mode 100644 sources/pulsedive.json create mode 100644 sources/pulsedive_analyze.json create mode 100644 sources/pypi_user.json create mode 100644 sources/recordedfuture_ip.json create mode 100644 sources/reddit_user.json create mode 100644 sources/robtex_ip.json create mode 100644 sources/scamwatcher.json create mode 100644 sources/scylla_sh_search.json create mode 100644 sources/securitytrails_history.json create mode 100644 sources/securitytrails_sub.json create mode 100644 sources/shodan_dns.json create mode 100644 sources/shodan_exploits.json create mode 100644 sources/shodan_host.json create mode 100644 sources/shodan_search.json create mode 100644 sources/snusbase.json create mode 100644 sources/spycloud_breach.json create mode 100644 sources/spyonweb.json create mode 100644 sources/spyse_domain.json create mode 100644 sources/spyse_ip.json create mode 100644 sources/sublist3r_api.json create mode 100644 sources/threatconnect_search.json create mode 100644 sources/threatcrowd_domain.json create mode 100644 sources/threatcrowd_email.json create mode 100644 sources/threatminer_domain.json create mode 100644 sources/threatminer_ip.json create mode 100644 sources/threatportal.json create mode 100644 sources/tines_breach.json create mode 100644 sources/twitter_v2.json create mode 100644 sources/urlscan_search.json create mode 100644 sources/urlvoid.json create mode 100644 sources/viewdns_reverse_ip.json create mode 100644 sources/vigilante_pw.json create mode 100644 sources/virustotal_domain.json create mode 100644 sources/virustotal_ip.json create mode 100644 sources/vulners_search.json create mode 100644 sources/wayback_machine.json create mode 100644 sources/whois_freaks.json create mode 100644 sources/whoisxml_api.json create mode 100644 sources/whoxy_whois.json create mode 100644 sources/zerobounce.json create mode 100644 sources/zoomeye_host.json create mode 100644 tests/__init__.py create mode 100644 tests/test_cracker.py create mode 100644 tests/test_detect.py create mode 100644 tests/test_identity.py create mode 100644 tests/test_reporting.py create mode 100644 tests/test_risk.py create mode 100644 tests/test_scanner.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0a64acc --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: pip install pytest pytest-asyncio aiohttp pydantic colorama rich fpdf2 dnspython phonenumbers stem cloudscraper + + - name: Run tests + run: python -m pytest tests/ -v diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..fae7172 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,40 @@ +name: Release + +on: + push: + tags: + - "v*" + +jobs: + release: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: pip install pytest pytest-asyncio aiohttp pydantic colorama rich fpdf2 dnspython phonenumbers stem cloudscraper + + - name: Run tests + run: python -m pytest tests/ -v + + - name: Install fpm + run: | + sudo apt-get install -y ruby ruby-dev build-essential + sudo gem install fpm + + - name: Build .deb + run: bash build_deb.sh + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + files: dist/*.deb + generate_release_notes: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d140886 --- /dev/null +++ b/.gitignore @@ -0,0 +1,44 @@ +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +.pytest_cache/ +*.egg-info/ +dist/ +build/ +MANIFEST + +# Virtual environments +.venv/ +venv/ +env/ +ENV/ + +# NOX runtime artifacts — never commit +nox_cache.db +*.log +proxies.txt +*.deb +reports/ + +# Credentials — never commit +apikeys.json +credentials.ini +*.key +*.pem + +# OS +.DS_Store +Thumbs.db +.idea/ +.vscode/ +*.swp +*.swo + +# Env files +.env +.env.* + +# Internal development notes — not for distribution +IMPROVEMENTS.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..bec60be --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,24 @@ +# Changelog + +All notable changes to NOX are documented here. + +## [1.0.0] — 2026-04-02 + +### Initial Release + +- 124 Pydantic v2-validated JSON source plugins across breach, network, OSINT, and threat-intel categories +- Fully async execution engine (`asyncio` + `aiohttp`) with JA3 TLS fingerprinting and per-request jitter +- `--autoscan` pipeline: breach scan → recursive identity pivot (depth 2) → Google/DDG dorking → paste/Telegram scraping +- `--fullscan`: breach scan + pivot only +- `--scan` / REPL `scan`: breach sources only +- Guardian Proxy Engine: automatic proxy rotation with fail-safe kill-switch +- Risk scoring engine (0–100) with time-decay, source confidence weighting, persistence multipliers, and HVT detection +- Recursive Avalanche Engine: every discovered asset re-injected as a new scan seed across breach, dork, and scrape concurrently +- Union-Find identity clustering across all breach records +- Forensic PDF/HTML/JSON/CSV/Markdown reporting with Executive Summary dashboard +- Hash identification and multi-engine cracking (dictionary + mutations + online rainbow tables) +- Deep password strength analysis with entropy, leet-speak detection, and crack-time estimates +- Interactive REPL with full feature parity with the CLI +- Full audit logging: all scan events mirrored to `~/.nox/logs/nox.log` +- Isolated `.deb` packaging for Kali Linux (PEP 668 compliant — zero system pollution) +- `~/.config/nox-cli/apikeys.json` credential store (chmod 0600) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..aaea786 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,35 @@ +# Contributing to NOX + +## Before You Start + +NOX is a security tool. All contributions must comply with the [Legal Disclaimer](README.md#legal-disclaimer) and the [Apache 2.0 License](LICENSE.txt). + +## Adding an Intelligence Source + +All sources are defined exclusively in `build_sources.py`. Never edit `sources/*.json` directly — they are auto-generated artifacts. + +1. Add a `_base()` (public) or `_auth()` (API key required) call in `build_sources.py` +2. Run `python build_sources.py` to regenerate and validate all plugins +3. Verify with `nox-cli --sources` + +## Code Style + +- Python 3.8+ compatible +- No new runtime dependencies without justification in the PR +- All async I/O through `aiohttp` — no `requests` in hot paths +- Error handling: log at `DEBUG`, never crash the scan loop + +## Pull Request Checklist + +- [ ] `python3 -m py_compile nox.py` passes +- [ ] `python build_sources.py` completes without errors +- [ ] No credentials, API keys, or personal data in the diff +- [ ] `sources/*.json` regenerated if `build_sources.py` was modified + +## Reporting Bugs + +Open a GitHub issue with: +- NOX version (`nox-cli --version`) +- Python version +- Minimal reproduction steps +- Expected vs actual behaviour diff --git a/LEGAL.md b/LEGAL.md new file mode 100644 index 0000000..70ef0a1 --- /dev/null +++ b/LEGAL.md @@ -0,0 +1,38 @@ +# Legal Disclaimer + +**NOX (the “Tool”) is provided for educational and authorised security research purposes only.** + +By using this Tool, you agree to the following terms: + +1. **Authorisation** + You must have explicit permission from the owner of any target system or data before scanning it. Unauthorised access to computer systems, networks, or data is illegal in most jurisdictions. + +2. **No Warranty** + The Tool is provided “AS IS”, without any warranty of any kind, express or implied. The authors and contributors assume no liability for any damage or legal consequences arising from the use of this Tool. + +3. **Compliance with Laws** + You are solely responsible for ensuring that your use of this Tool complies with all applicable local, national, and international laws and regulations. + +4. **Third‑Party Services** + The Tool interacts with public websites and APIs. You must respect the terms of service of those services. The authors are not responsible for any violation of those terms by users of the Tool. + +5. **Ethical Use** + You shall use the Tool only for legitimate security testing, personal data protection, or academic research. Any use that violates privacy rights, intellectual property rights, or otherwise causes harm is strictly prohibited. + +6. **Indemnification** + You agree to indemnify and hold harmless the authors and contributors from any claims, damages, or expenses arising from your misuse of the Tool. + +7. **Export Control** + This Tool may be subject to export control laws. You agree to comply with all applicable export and import restrictions. + +8. **No Guarantee of Accuracy** + The Tool aggregates data from third‑party sources and may contain inaccuracies. The authors do not guarantee the correctness, completeness, or timeliness of any information obtained through the Tool. + +9. **Termination** + The authors reserve the right to revoke permission to use this Tool at any time for any reason. + +**By downloading, installing, or using NOX, you acknowledge that you have read, understood, and agree to be bound by this disclaimer.** + +--- + +If you do not agree with these terms, do not use the Tool. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..90c465c --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,207 @@ + +--- + +### `LICENSE` + +```txt + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2025 NOX Project + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..a766258 --- /dev/null +++ b/README.md @@ -0,0 +1,502 @@ +
+ +``` + ███╗ ██╗ ██████╗ ██╗ ██╗ + ████╗ ██║██╔═══██╗╚██╗██╔╝ + ██╔██╗ ██║██║ ██║ ╚███╔╝ + ██║╚██╗██║██║ ██║ ██╔██╗ + ██║ ╚████║╚██████╔╝██╔╝ ██╗ + ╚═╝ ╚═══╝ ╚═════╝ ╚═╝ ╚═╝ +``` + +**Cyber Threat Intelligence Framework** + +[![Status](https://img.shields.io/badge/Status-v1.0.0-success)](https://github.com/nox-project/nox-framework/releases/tag/v1.0.0) +[![Python](https://img.shields.io/badge/Python-3.8%2B-blue?logo=python&logoColor=white)](https://www.python.org/) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE.txt) +[![Kali Linux](https://img.shields.io/badge/Kali%20Linux-Ready-557C94?logo=kalilinux&logoColor=white)](https://www.kali.org/) +[![Platform](https://img.shields.io/badge/Platform-Linux%20%7C%20macOS%20%7C%20Windows-lightgrey)](https://github.com/nox-project/nox-framework) +[![Sources](https://img.shields.io/badge/Sources-124-red)](https://github.com/nox-project/nox-framework) + +*OSINT framework for red teaming, digital forensics, and corporate exposure analysis.* + +
+ +--- + +## Introduction + +NOX is a purpose-built cyber threat intelligence engine designed for operators who require speed, operational security, and depth in a single cohesive framework. It is not a wrapper around existing tools — it is a fully async, plugin-driven intelligence platform with a strict separation between execution logic and source definitions. + +| Capability | Detail | +|-|-| +| ⚡ **Async Execution Engine** | Massively parallel scanning across 124 intelligence feeds with no sequential bottlenecks and no blocking I/O. | +| 🛡️ **Guardian Engine** | Integrated OPSEC layer with automatic proxy rotation and SOCKS5 support. Fail-safe kill-switch halts all traffic if the transport circuit is unavailable. | +| 🧠 **Risk Scoring** | Dynamic 0–100 scoring with time-decay, source confidence weighting, password complexity analysis, persistence multipliers, and HVT detection. | +| 🔗 **Recursive Avalanche Engine** | Every discovered asset — username, email, cracked password, phone — is automatically re-injected as a new scan seed. Per-asset pipeline runs sequentially (breach → crack → dork → scrape); child assets run concurrently. Identifiers from all four phases feed the pivot queue. Global deduplication and configurable depth cap prevent runaway recursion. | +| 🔍 **Autoscan** | Single command triggers breach scan + recursive pivot + dorking + paste scraping — fully automated, no manual chaining. | + +--- + +## Features + +| Feature | Description | +|-|-| +| **124 JSON Plugin Sources** | Every intelligence source is a JSON plugin. The execution engine contains zero hardcoded source logic. | +| **Async Core** | Full `asyncio` event loop with JA3 fingerprinting, SSL session management, per-request jitter, and configurable concurrency. | +| **Autoscan Pipeline** | `--autoscan` triggers: breach scan → recursive pivot → Google/Bing/DDG dorking → paste/Telegram scraping — all in one command. | +| **Recursive Avalanche Engine** | Every identifier discovered — from breach records, dork hits, or scraped paste/Telegram content — is re-injected as a new seed. Per-asset pipeline is sequential (breach → crack → dork → scrape); child assets run concurrently via `asyncio.gather`. A global `seen_assets` set prevents infinite loops. Concurrency and depth are fully configurable at runtime via `--threads` and `--depth`. | +| **Hash Pivoting** | Hashes found in breach data are automatically identified (MD5/SHA1/SHA256/NTLM/bcrypt) and cracked via concurrent background API queries. Cracked plaintexts are injected into the pivot queue as password-recycling seeds. Failures are logged silently — the scan never stops. | +| **Guardian Proxy Engine** | Zero-config OPSEC layer: reads `proxies.txt` if present; otherwise auto-fetches and validates a high-anonymity proxy pool in-memory. Full SOCKS5/HTTP/S and Tor support. | +| **API Key Rotation** | `api_key_slots` per source — NOX round-robins across multiple keys to bypass per-key rate limits. | +| **Identity Graphing** | Union-Find correlation engine unifies breach records into identity clusters across all sources, using type-aware pivot classification. | +| **Enterprise Forensic Reports** | Professional PDF/HTML/JSON/CSV/Markdown reports with Executive Summary dashboard (Total Time, Nodes Discovered, Cleartext Passwords, Pivot Depth), interactive Pivot Chain Visualization, and strict data sanitization — no technical noise in output. JSON exports are self-describing with a full metadata block. | +| **HVT Detection** | Auto-flags C-level, Admin, DevOps, and government domain accounts as High-Value Targets. | +| **Dorking Engine** | Passive document discovery via Google/Bing/DDG dorks with PDF/Office metadata extraction. | +| **Scraping Engine** | Paste site indexing, Telegram CTI channel monitoring, credential extraction, and misconfiguration discovery. Each autoscan asset gets a dedicated scrape session — no shared state. | +| **Proxy / Tor** | SOCKS5, HTTP/S proxy, full Tor routing via `stem`, and automatic Guardian fallback. SOCKS5 proxies are validated and routed correctly via `aiohttp-socks`. | +| **Secure Key Store** | API keys managed via `~/.config/nox-cli/apikeys.json` (chmod 0600). Unconfigured keys are silently skipped. Keys set via environment variable are picked up automatically without restarting. | +| **System Logging** | All scan events, phase completions, pivot discoveries, API events, rate-limits, and crack attempts are written to `~/.nox/logs/nox.log`. Only actionable intelligence reaches the terminal. | +| **Plugin Debug** | `--list-sources` prints a full operator debug table: plugin name, input type, confidence score, key status (configured / not configured / public), and any JSON parse errors. | + +--- + +## Architecture + +### Plugin-Driven Design + +NOX operates on a strict separation of concerns: `nox.py` is a **pure, agnostic execution engine** — it handles async I/O, JA3 fingerprinting, SSL session management, recursive pivoting, and result correlation. It contains no hardcoded intelligence logic. + +All intelligence is defined as **JSON plugins** in `sources/`. These plugins are the sole source of truth for what NOX queries, how it authenticates, and what it extracts. The build tool `build_sources.py` is the only authorised way to create or modify them. + +``` +build_sources.py ──► sources/*.json ──► nox.py (runtime loader) + [Builder] [Plugins] [Execution Engine] +``` + +> [!IMPORTANT] +> **`sources/*.json` files are auto-generated artifacts. Never edit them directly.** +> All source additions and modifications must be made in `build_sources.py` and applied by running `python build_sources.py`. Manual edits will be overwritten on the next build. + +#### Source Schema + +```json +{ + "name": "MyPrivateDB", + "endpoint": "https://api.myprivatedb.com/search?q={target}", + "method": "GET", + "headers": { "Authorization": "Bearer {MY_API_KEY}" }, + "regex_pattern": "([\\w.+-]+@[\\w-]+\\.[\\w.]+):([\\S]+)", + "required_api_key_name": "MY_API_KEY", + "api_key_slots": ["{MY_API_KEY}"], + "input_type": "email", + "output_type": ["username", "ip"], + "pivot_types": ["email", "username"], + "confidence": 0.9 +} +``` + +Supported fields: `name`, `endpoint`, `method`, `headers`, `regex_pattern` (or `json_root` + `normalization_map`), `required_api_key_name`, `api_key_slots`, `input_type`, `output_type`, `pivot_types`, `confidence`. + +--- + +### Autoscan Pipeline + +`--autoscan` (CLI) / `autoscan` (REPL) executes the full intelligence pipeline in a single command: + +``` +For each asset (seed + every discovered identifier): + ├─ Phase 1 — Breach Scan + │ 124 sources queried in parallel (async) + │ + ├─ Phase 2 — Hash Crack (non-blocking, concurrent) + │ Hashes found in breach data → rainbow-table APIs → cracked plaintext + │ → password-recycling breach scan + │ + ├─ Phase 3 — Dorking + │ Google/Bing/DDG dorks → leaked docs, .env files, SQL dumps + │ → new identifiers extracted and re-injected + │ + └─ Phase 4 — Scraping + Pastebin, IntelX, Telegram CTI channels → credential extraction + → new identifiers extracted and re-injected + +All identifiers discovered in phases 1–4 are re-injected as new seeds. +Child assets are processed concurrently via asyncio.gather. +``` + +`scan` (without `--autoscan`) runs Phase 1 only — breach sources, no pivot/dork/scrape. + +--- + +### Recursive Avalanche Engine + +Every identifier discovered during a scan — from breach records, dork hits, or scraped paste/Telegram content — is treated as a new intelligence seed. For each asset, the engine runs four phases sequentially: breach scan → hash crack → dork → scrape. Identifiers extracted from **all four phases** are harvested and re-injected as new seeds. Child assets are then processed concurrently via `asyncio.gather`. + +``` +target@company.com + └─► [Breach] username: j.doe ──► [Breach + Crack + Dork + Scrape] + │ └─► github.com/jdoe ──► [Breach + Crack + Dork + Scrape] + └─► [Breach] hash: 5f4dcc... ──► [AutoCrack] → "password123" + │ └─► [Breach] password-recycling scan across all sources + └─► [Dork] new@email.com ──► [Breach + Crack + Dork + Scrape] + └─► [Scrape/paste] admin@corp.com ──► [Breach + Crack + Dork + Scrape] +``` + +- **`seen_assets` set** — global deduplication; no identifier is ever processed twice, regardless of which phase discovered it +- **Global semaphore** — single shared concurrency cap across the entire discovery tree, respecting `--threads` +- **`--depth N`** — configurable pivot depth (default: 2); hard backstop prevents runaway recursion +- **`--no-pivot`** — disable recursive enrichment for a fast breach-only scan + +--- + +### Hash Pivoting + +When a hash is found in breach data during `--autoscan`: + +1. Hash type is identified (MD5/NTLM, SHA1, SHA256, bcrypt) +2. Multiple rainbow-table APIs are queried **concurrently** in a background task +3. **If cracked** — plaintext is logged, the record is updated, and the password is injected into the pivot queue for password-recycling analysis across all breach sources +4. **If not cracked** — failure is logged to `nox_system.log`, the hash is preserved in the report, and pivoting on all other assets continues immediately + +The crack process is fully non-blocking. A timeout or API failure never pauses the scan. Use `--no-online-crack` to restrict cracking to the local wordlist only (no data sent to third-party APIs). + +--- + +### Guardian Proxy Engine + +The Guardian Engine is NOX's zero-config OPSEC layer. It activates automatically when no `--proxy` or `--tor` flag is supplied. + +**Resolution order:** + +1. **`proxies.txt`** — if present in the working directory, NOX loads and rotates through the listed proxies. +2. **Dynamic fetch** — if `proxies.txt` is absent, the Guardian Engine fetches a fresh list of high-anonymity public proxies, validates each one, and holds the validated pool in-memory for the session. Nothing is written to disk. +3. **Direct connection** — if no valid proxies are found, NOX falls back to a direct connection and emits a warning. + +> [!WARNING] +> Public proxy pools are inherently untrusted infrastructure. For sensitive engagements, always supply a controlled proxy via `--proxy` or route through Tor via `--tor`. + +| Flag | Behaviour | +|-|-| +| `--proxy ` | Route all traffic through the specified HTTP/S or SOCKS5 proxy. Disables Guardian. | +| `--tor` | Route all traffic through Tor (requires `tor` service on port 9050). Disables Guardian. | +| `--guardian-off` | Bypass the OPSEC kill-switch and connect directly. | +| *(no flag)* | Guardian Engine activates automatically. | + +--- + +### Reporting + +All report formats include an **Executive Summary dashboard**: + +| Metric | Description | +|-|-| +| Total Time | Wall-clock duration of the full scan | +| Nodes Discovered | Unique identities surfaced across all sources | +| Cleartext Passwords | Plaintext credentials found or cracked | +| Pivot Depth | Depth reached by the recursive avalanche engine | + +Reports also include a **Pivot Chain Visualization** showing the full relational path from initial seed to final discovery: + +``` +[seed@corp.com] -> [LeakA / username:jdoe] -> [Dork: leaked .env] -> [new@email.com] +``` + +JSON exports include a `_meta` block with `scan_id`, `target`, `timestamp`, `nox_version`, and `pivot_depth_reached` — making every export self-describing for ingestion into case management platforms. + +All output is sanitized — proxy errors, timeouts, and tracebacks are stripped. Only actionable intelligence is included. + +--- + +## Filesystem Layout + +``` +~/.nox/ +├── sources/ # Auto-generated JSON source plugins +├── reports/ # Generated forensic reports +├── logs/ # Runtime log (nox.log) +├── wordlists/ # Hash cracking wordlists +├── vault/ # Secure storage +└── nox_cache.db # Forensic persistence database (SQLite) + +~/.config/nox-cli/ +├── apikeys.json # API keys — chmod 0600, never committed to VCS +└── logs/ + └── nox_system.log # Silent system log: API events, rate-limits, crack attempts + +# .deb install (isolated venv) +/opt/nox-cli/ +├── nox.py +├── build_sources.py +├── requirements.txt +├── sources/ +└── .venv/ # Isolated Python environment (PEP 668 compliant) +``` + +--- + +## Prerequisites + +- **Python 3.8+** +- **pip** (`python3-pip` on Debian/Kali) +- **Tor** *(optional)* — required only for `--tor`. On Kali: `sudo apt install tor -y`. The `tor` service must be running on port `9050`. + +--- + +## Installation + +### Option 1: Debian / Kali Linux — Isolated .deb (Recommended) + +Download the `.deb` package from the [Releases page](https://github.com/nox-project/nox-framework/releases), then run: + +```bash +sudo dpkg -i nox-cli_*_all.deb +nox-cli --help +``` + +The post-install script automatically: +1. Creates an isolated virtual environment at `/opt/nox-cli/.venv` +2. Installs all Python dependencies inside the venv (PEP 668 compliant — zero system pollution) +3. Builds the 124 source plugins +4. Links `/usr/bin/nox-cli` → `/opt/nox-cli/nox-wrapper.sh` + +### Option 2: From Source + +```bash +git clone https://github.com/nox-project/nox-framework.git +cd nox-framework +pip install -r requirements.txt +python build_sources.py +python3 nox.py +``` + +--- + +## Quick Start + +**Step 1 — Build source plugins** *(from source only — .deb does this automatically)* + +```bash +python build_sources.py +``` + +**Step 2 — Configure API keys** + +`build_sources.py` creates `~/.config/nox-cli/apikeys.json` on first run, pre-populated with every supported service. The file is `chmod 0600` and is never committed to VCS. + +This is the **single canonical key store** — all sources read from it at runtime. + +```bash +# Edit the file directly +nano ~/.config/nox-cli/apikeys.json + +# Or inspect plugin status and key configuration +nox-cli --list-sources +``` + +> [!NOTE] +> Any key set to `INSERT_API_KEY_HERE` or `""` is treated as unconfigured — that source is silently skipped. Sources without a key requirement are always active. +> +> **Load priority:** environment variable (e.g. `export HIBP_API_KEY=xxx`) → `~/.config/nox-cli/apikeys.json` + +**Step 3 — Execute** + +> [!NOTE] +> **OPSEC Kill-Switch:** By default, NOX activates the Guardian Engine (auto proxy rotation). Use `--guardian-off` to connect directly. + +```bash +# Breach scan — input type auto-detected (email / domain / ip / username / hash / phone) +nox-cli -t target@company.com + +# Full autoscan: breach + recursive pivot + dork + scrape +nox-cli -t target@company.com --autoscan + +# Autoscan with Tor routing +nox-cli -t target@company.com --autoscan --tor + +# Autoscan with SOCKS5 proxy + PDF report +nox-cli -t target@company.com --autoscan --proxy socks5://127.0.0.1:1080 -o report.pdf --format pdf + +# Autoscan with custom pivot depth +nox-cli -t target@company.com --autoscan --depth 3 + +# Breach scan only — no pivot, no dork, no scrape +nox-cli -t target@company.com --no-pivot + +# Domain scan +nox-cli -t company.com + +# Hash identification and cracking +nox-cli --crack 5f4dcc3b5aa765d61d8327deb882cf99 + +# Hash cracking — local wordlist only, no third-party API calls +nox-cli --crack 5f4dcc3b5aa765d61d8327deb882cf99 --no-online-crack + +# Password strength analysis +nox-cli --analyze "P@ssw0rd123" + +# Google dorking +nox-cli --dork target@company.com + +# Paste / Telegram scraping +nox-cli --scrape target@company.com + +# Compare scan against last cached result — show only new findings +nox-cli -t target@company.com --diff + +# Plugin debug: loaded sources, input types, confidence, key status +nox-cli --list-sources + +# Force resync of source plugins from package +nox-cli --reset-sources +``` + +--- + +## CLI Reference + +``` +usage: nox-cli [-h] [-t TARGET] [-i] [--version] + [--autoscan] [--fullscan] [--no-pivot] [--depth N] + [--dork TARGET] [--scrape TARGET] + [--crack HASH] [--no-online-crack] + [--analyze PASS] [--list-sources] [--reset-sources] + [--tor] [--proxy URL] [--guardian-off] [--allow-leak] + [--threads N] [--timeout N] + [-o FILE] [--format {json,csv,html,md,pdf}] + [--diff] + + -t, --target TARGET Target to scan (auto-detected type) + -i, --interactive Launch interactive REPL + --version Show version and exit + --autoscan Full pipeline: breach + pivot + dork + scrape + --fullscan Breach + pivot only (no dork/scrape) + --no-pivot Disable recursive pivot enrichment + --depth N Avalanche pivot depth (default: 2) + --dork TARGET Google/Bing/DDG dorking for leaked documents + --scrape TARGET Paste site + Telegram scraping + --crack HASH Identify and crack a hash + --no-online-crack Local wordlist only — no data sent to third-party APIs + --analyze PASS Deep password strength analysis + --list-sources Plugin debug: input type, confidence, key status + --reset-sources Force resync of source plugins from package + --tor Route all traffic through Tor (port 9050) + --proxy URL HTTP/S or SOCKS5 proxy URL + --guardian-off Bypass OPSEC kill-switch (direct connection) + --allow-leak Allow direct connection if proxy/Tor is unavailable + --threads N Concurrency limit (default: 20) + --timeout N Request timeout in seconds (default: 15) + -o, --output FILE Output file path + --format FORMAT Output format: json, csv, html, md, pdf + --diff Show only new findings vs last cached scan +``` + +--- + +## REPL + +Launch the interactive REPL with no arguments: + +```bash +nox-cli +``` + +``` +Command Description +----------- --------------------------------------------------------------- +autoscan Full pipeline: breach + pivot + dork + scrape +scan Breach intelligence scan only +dork Google/Bing/DDG dorking for leaked documents +scrape Paste site + Telegram scraping +crack Identify and crack a hash +analyze Deep password strength analysis +graph ASCII identity graph of last scan +visualize ASCII relationship map (Target → Data → Pivots) +pivot Re-scan using result #n as new pivot seed +search Filter in-memory records by keyword +sources Plugin debug: input type, confidence, key status +export Export results (json / csv / html / md / pdf) +tor Toggle Tor routing on/off +proxy Set or clear proxy URL +config Configure threads / timeout / depth +help Show this menu +quit Exit NOX +``` + +**Examples:** + +``` +nox> autoscan target@company.com +nox> graph +nox> visualize +nox> pivot 3 +nox> search admin +nox> export pdf investigation.pdf +nox> sources +nox> config threads 30 +nox> config depth 3 +nox> proxy socks5://127.0.0.1:1080 +nox> tor +``` + +--- + +## Source Management + +### Adding a Source + +**1. Define in `build_sources.py`:** + +```python +_auth("NewIntelDB", "breaches", + "https://api.newinteldb.com/v1/search?q={target}", "GET", + {"results": "$.results"}, + headers={"X-API-Key": "{NEWINTELDB_API_KEY}"}, + api_key_slots=["{NEWINTELDB_API_KEY}"], + normalization_map={"email": "email", "password": "password"}, + input_type="email", + output_type=["username", "ip"], + confidence=0.85) +``` + +**2. Rebuild:** + +```bash +python build_sources.py +``` + +> [!NOTE] +> The builder validates every source at build time: GET endpoints must contain `{target}`, volatile sources must have `reliability_score ≤ 4`, and the `confidence` field can be set explicitly to override the formula-derived value. + +--- + +## Building the .deb Package + +```bash +gem install fpm +bash build_deb.sh +sudo dpkg -i dist/nox-cli_*_all.deb +``` + +--- + +## Legal Disclaimer + +> [!WARNING] +> **NOX is intended exclusively for:** +> - Authorised penetration testing and red team engagements with explicit written consent +> - Corporate exposure analysis on assets you own or are contracted to assess +> - Digital forensics and incident response +> - Academic and security research in controlled, isolated environments +> +> **Unauthorised use of this tool against systems, networks, or individuals without explicit written permission is a criminal offence** under the Computer Fraud and Abuse Act (CFAA, 18 U.S.C. § 1030), the Computer Misuse Act 1990 (CMA), and equivalent legislation in all major jurisdictions worldwide. +> +> The authors and contributors of NOX accept **no liability** for any direct, indirect, incidental, or consequential damages arising from misuse of this software. By downloading, installing, or executing NOX, you unconditionally agree to comply with all applicable local, national, and international laws, and to only target systems and data for which you hold explicit, documented authorisation. +> +> **If you do not agree to these terms, do not use this software.** + +--- + +## License + +[Apache License 2.0](LICENSE.txt) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..45d20b5 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,34 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +|---------|-----------| +| 1.0.x | ✅ Active | + +## Reporting a Vulnerability + +Report security vulnerabilities **privately** — do not open a public issue. + +**Contact:** open a [GitHub Security Advisory](https://github.com/nox-project/nox-framework/security/advisories/new) + +Include: +- A clear description of the vulnerability +- Steps to reproduce +- Potential impact +- Suggested fix (if any) + +You will receive an acknowledgement within 48 hours. Critical vulnerabilities are patched within 7 days. + +## Scope + +In-scope: +- Remote code execution via crafted source plugin or API response +- Credential leakage from the vault or apikeys.json +- OPSEC bypass (real IP exposure when proxy/Tor is configured) +- Dependency vulnerabilities with a direct exploit path + +Out of scope: +- Issues requiring physical access to the machine +- Social engineering +- Vulnerabilities in third-party APIs queried by NOX diff --git a/build_deb.sh b/build_deb.sh new file mode 100755 index 0000000..c58ef38 --- /dev/null +++ b/build_deb.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +set -e + +# NOX v1.0.0 — .deb build script (FPM) +# Requires: fpm → gem install fpm + +VERSION="1.0.0" +PKG_NAME="nox-cli" +ARCH="all" +OUT_DIR="dist" + +command -v fpm &>/dev/null || { echo "[!] fpm not found: gem install fpm" >&2; exit 1; } + +mkdir -p "$OUT_DIR" +echo "[*] Building ${PKG_NAME}_${VERSION}_${ARCH}.deb ..." + +fpm \ + --input-type dir \ + --output-type deb \ + --name "$PKG_NAME" \ + --version "$VERSION" \ + --architecture "$ARCH" \ + --maintainer "nox-project " \ + --description "NOX — Cyber Threat Intelligence Framework — 120+ async breach sources, pivot engine, HVT detection" \ + --url "https://github.com/nox-project/nox-framework" \ + --license "Apache-2.0" \ + --depends "python3" \ + --depends "python3-venv" \ + --depends "python3-pip" \ + --after-install postinst.sh \ + --package "${OUT_DIR}/${PKG_NAME}_${VERSION}_${ARCH}.deb" \ + --force \ + nox.py=/opt/nox-cli/nox.py \ + build_sources.py=/opt/nox-cli/build_sources.py \ + requirements.txt=/opt/nox-cli/requirements.txt \ + sources/=/opt/nox-cli/sources/ \ + sources/helpers/=/opt/nox-cli/sources/helpers/ \ + nox-wrapper.sh=/opt/nox-cli/nox-wrapper.sh \ + docs/nox-cli.1=/usr/share/man/man1/nox-cli.1 + +echo "[+] Built: ${OUT_DIR}/${PKG_NAME}_${VERSION}_${ARCH}.deb" diff --git a/build_sources.py b/build_sources.py new file mode 100644 index 0000000..bed883e --- /dev/null +++ b/build_sources.py @@ -0,0 +1,1341 @@ +""" +build_sources.py — NOX Framework · Production Source Builder +Generates individual JSON plugin files for every intelligence source. +""" +from __future__ import annotations + +import json +import os +import sys +import tempfile +from pathlib import Path +from typing import Any, Dict, List, Literal, Optional + +from pydantic import BaseModel, Field, field_validator, model_validator + +# ── Shared placeholder constant ──────────────────────────────────────── +# Import from config_handler so the string is defined in exactly one place. +# Fall back to the literal if the helper isn't on sys.path yet (e.g. bare +# invocation before sources/ exists). +try: + sys.path.insert(0, str(Path(__file__).parent)) + from sources.helpers.config_handler import ( + UNIVERSAL_PLACEHOLDER, + _APIKEYS_FILE, + _default_store, + _write_store, + ) +except ImportError: + UNIVERSAL_PLACEHOLDER = "INSERT_API_KEY_HERE" + _APIKEYS_FILE = None + _default_store = None + _write_store = None + +# --------------------------------------------------------------------------- +# Pydantic Schema +# --------------------------------------------------------------------------- + +HttpMethod = Literal["GET", "POST", "PUT", "DELETE"] +InputType = Literal["email", "ip", "domain", "hash", "username", "phone", "url", "cve", "any"] +ReliabilityScore = Literal[1, 2, 3, 4, 5] + + +class SourceConfig(BaseModel): + # ── Mandatory core ────────────────────────────────────────────────────── + name: str + category: str + endpoint: str + method: HttpMethod + requires_auth: bool + selectors: Dict[str, str] + + # ── Request plumbing ──────────────────────────────────────────────────── + rate_limit: float = 1.0 + headers: Dict[str, str] = Field(default_factory=dict) + payload_template: Optional[Dict[str, Any]] = None + api_key_slots: List[str] = Field(default_factory=list) + + # ── Typing & pivoting ─────────────────────────────────────────────────── + input_type: InputType = "any" + output_type: List[str] = Field(default_factory=list) + normalization_map: Dict[str, str] = Field(default_factory=dict) + tags: List[str] = Field(default_factory=list) + + # ── Resilience ────────────────────────────────────────────────────────── + health_check_url: str + expected_status: int = 200 + reliability_score: ReliabilityScore = 5 + is_volatile: Optional[bool] = None # omitted when False + bypass_required: Optional[List[str]] = None # omitted when empty + user_agent_type: Optional[str] = None # omitted when absent + backup_endpoints: List[str] = Field(default_factory=list) + # H2: optional confidence override — when set, takes precedence over formula + confidence: Optional[float] = None + + @field_validator("reliability_score") + @classmethod + def _score_range(cls, v: int) -> int: + if not 1 <= v <= 5: + raise ValueError("reliability_score must be 1–5") + return v + + @model_validator(mode="after") + def _validate_source(self) -> "SourceConfig": + # H1: GET endpoints must contain {target} placeholder + if self.method.upper() == "GET" and "{target}" not in self.endpoint: + raise ValueError( + f"'{self.name}': GET endpoint must contain {{target}} placeholder: {self.endpoint!r}" + ) + # L3: volatile sources must have reliability_score ≤ 4 (was > 3, now > 4) + if self.is_volatile and self.reliability_score > 4: + raise ValueError( + f"'{self.name}': is_volatile sources must have reliability_score ≤ 4" + ) + return self + + def to_json(self) -> str: + data = self.model_dump(exclude_none=True) + # Drop is_volatile / bypass_required / user_agent_type when falsy + for key in ("is_volatile", "bypass_required", "user_agent_type"): + if not data.get(key): + data.pop(key, None) + # H2: use explicit confidence if set, otherwise derive from reliability_score + data["confidence"] = ( + round(self.confidence, 2) + if self.confidence is not None + else round(0.4 + (self.reliability_score - 1) * 0.15, 2) + ) + return json.dumps(data, indent=4) + + +# --------------------------------------------------------------------------- +# Builder helpers (_base → requires_auth=False, _auth → requires_auth=True) +# --------------------------------------------------------------------------- + +def _mk( + name: str, category: str, endpoint: str, method: HttpMethod, + selectors: Dict[str, str], *, + requires_auth: bool, + rate_limit: float = 1.0, + headers: Optional[Dict[str, str]] = None, + payload_template: Optional[Dict[str, Any]] = None, + api_key_slots: Optional[List[str]] = None, + input_type: InputType = "any", + output_type: Optional[List[str]] = None, + normalization_map: Optional[Dict[str, str]] = None, + tags: Optional[List[str]] = None, + health_check_url: Optional[str] = None, + expected_status: int = 200, + reliability_score: ReliabilityScore = 5, + is_volatile: bool = False, + bypass_required: Optional[List[str]] = None, + user_agent_type: Optional[str] = None, + backup_endpoints: Optional[List[str]] = None, +) -> SourceConfig: + return SourceConfig( + name=name, category=category, endpoint=endpoint, method=method, + requires_auth=requires_auth, selectors=selectors, + rate_limit=rate_limit, + headers=headers or {}, + payload_template=payload_template, + api_key_slots=api_key_slots or [], + input_type=input_type, + output_type=output_type or [], + normalization_map=normalization_map or {}, + tags=tags or [], + health_check_url=health_check_url or endpoint.split("{")[0].rstrip("/?"), + expected_status=expected_status, + reliability_score=reliability_score, + is_volatile=is_volatile or None, + bypass_required=bypass_required or None, + user_agent_type=user_agent_type, + backup_endpoints=backup_endpoints or [], + ) + + +def _base(name, category, endpoint, method, selectors, **kw) -> SourceConfig: + return _mk(name, category, endpoint, method, selectors, requires_auth=False, **kw) + + +def _auth(name, category, endpoint, method, selectors, **kw) -> SourceConfig: + return _mk(name, category, endpoint, method, selectors, requires_auth=True, **kw) + + +# --------------------------------------------------------------------------- +# FREE / PUBLIC SOURCES +# --------------------------------------------------------------------------- + +FREE_PUBLIC_SOURCES: List[SourceConfig] = [ + + _base("crt_sh", "certificate_transparency", + "https://crt.sh/?q={target}&output=json", "GET", + {"domains": "$.*.name_value"}, + headers={"Accept": "application/json"}, + input_type="domain", output_type=["domain"], + normalization_map={"name_value": "domain"}, + tags=["passive", "fast"], + health_check_url="https://crt.sh", reliability_score=5), + + _base("hackertarget_hostsearch", "dns_recon", + "https://api.hackertarget.com/hostsearch/?q={target}", "GET", + {"hosts": "text_lines"}, + input_type="domain", output_type=["ip", "domain"], + tags=["passive", "fast"], + health_check_url="https://api.hackertarget.com", reliability_score=4), + + _base("hackertarget_reverseip", "dns_recon", + "https://api.hackertarget.com/reverseiplookup/?q={target}", "GET", + {"domains": "text_lines"}, + input_type="ip", output_type=["domain"], + tags=["passive"], + health_check_url="https://api.hackertarget.com", reliability_score=4), + + _base("hackertarget_dnslookup", "dns_recon", + "https://api.hackertarget.com/dnslookup/?q={target}", "GET", + {"records": "text_lines"}, + input_type="domain", output_type=["ip"], + tags=["passive", "fast"], + health_check_url="https://api.hackertarget.com", reliability_score=4), + + _base("hackertarget_whois", "whois", + "https://api.hackertarget.com/whois/?q={target}", "GET", + {"raw": "text_lines"}, + input_type="domain", output_type=["email", "domain"], + tags=["passive"], + health_check_url="https://api.hackertarget.com", reliability_score=4), + + _base("alienvault_otx_domain", "threat_intel", + "https://otx.alienvault.com/api/v1/indicators/domain/{target}/general", "GET", + {"pulses": "$.pulse_info.count", "tags": "$.tags"}, + input_type="domain", output_type=["domain", "ip"], + tags=["passive", "threat"], + health_check_url="https://otx.alienvault.com", reliability_score=5), + + _base("alienvault_otx_ip", "threat_intel", + "https://otx.alienvault.com/api/v1/indicators/IPv4/{target}/general", "GET", + {"asn": "$.asn", "country": "$.country_name"}, + input_type="ip", output_type=["domain"], + tags=["passive", "threat"], + health_check_url="https://otx.alienvault.com", reliability_score=5), + + _base("alienvault_otx_malware", "threat_intel", + "https://otx.alienvault.com/api/v1/indicators/file/{target}/analysis", "GET", + {"malware": "$.analysis.malware"}, + input_type="hash", output_type=["hash"], + tags=["passive", "threat"], + health_check_url="https://otx.alienvault.com", reliability_score=5), + + _base("alienvault_otx_user", "social", + "https://otx.alienvault.com/api/v1/users/{target}/general", "GET", + {"pulses": "$.pulse_count"}, + input_type="username", output_type=["username"], + tags=["passive"], + health_check_url="https://otx.alienvault.com", reliability_score=5), + + _base("urlscan_search", "url_analysis", + "https://urlscan.io/api/v1/search/?q={target}", "GET", + {"urls": "$.results[*].page.url"}, + rate_limit=2.0, + input_type="domain", output_type=["url", "ip", "domain"], + tags=["passive"], + health_check_url="https://urlscan.io", reliability_score=5), + + _base("threatcrowd_email", "threat_intel", + "https://www.threatcrowd.org/searchApi/v2/email/report/?email={target}", "GET", + {"domains": "$.domains"}, + rate_limit=5.0, + input_type="email", output_type=["domain"], + tags=["passive", "threat"], + health_check_url="https://www.threatcrowd.org", reliability_score=3, + is_volatile=True, bypass_required=["cloudflare"], user_agent_type="browser"), + + _base("threatcrowd_domain", "threat_intel", + "https://www.threatcrowd.org/searchApi/v2/domain/report/?domain={target}", "GET", + {"ips": "$.resolutions[*].ip_address"}, + rate_limit=5.0, + input_type="domain", output_type=["ip"], + tags=["passive", "threat"], + health_check_url="https://www.threatcrowd.org", reliability_score=3, + is_volatile=True, bypass_required=["cloudflare"], user_agent_type="browser"), + + _base("pulsedive", "threat_intel", + "https://pulsedive.com/api/info.php?indicator={target}", "GET", + {"risk": "$.risk", "threats": "$.threats"}, + rate_limit=2.0, + input_type="any", output_type=["domain", "ip"], + tags=["passive", "threat"], + health_check_url="https://pulsedive.com", reliability_score=4), + + _base("hudsonrock_osint", "breach_data", + "https://cavalier.hudsonrock.com/api/json/v2/osint-tools/search-by-login?username={target}", "GET", + {"stealers": "$.stealers"}, + input_type="username", output_type=["email", "domain"], + normalization_map={"stealers": "breach_record"}, + tags=["passive", "stealth"], + health_check_url="https://cavalier.hudsonrock.com", reliability_score=4), + + _base("ipinfo_io", "geolocation", + "https://ipinfo.io/{target}/json", "GET", + {"org": "$.org", "city": "$.city"}, + input_type="ip", output_type=["domain"], + normalization_map={"org": "asn_org", "city": "geo_city"}, + tags=["passive", "fast"], + health_check_url="https://ipinfo.io", reliability_score=5), + + _base("ipapi_co", "geolocation", + "https://ipapi.co/{target}/json/", "GET", + {"asn": "$.asn", "org": "$.org"}, + headers={"User-Agent": "Mozilla/5.0"}, + input_type="ip", output_type=["domain"], + normalization_map={"asn": "asn_number", "org": "asn_org"}, + tags=["passive", "fast"], + health_check_url="https://ipapi.co", reliability_score=4), + + _base("bgpview_ip", "network", + "https://api.bgpview.io/ip/{target}", "GET", + {"prefixes": "$.data.prefixes[*].prefix"}, + input_type="ip", output_type=["ip"], + tags=["passive", "infrastructure"], + health_check_url="https://api.bgpview.io", reliability_score=4), + + _base("emailrep_io", "email_rep", + "https://emailrep.io/{target}", "GET", + {"reputation": "$.reputation"}, + rate_limit=2.0, + input_type="email", output_type=["email"], + normalization_map={"reputation": "email_reputation"}, + tags=["passive", "fast"], + health_check_url="https://emailrep.io", reliability_score=4), + + _base("github_users", "social", + "https://api.github.com/users/{target}", "GET", + {"bio": "$.bio", "blog": "$.blog"}, + rate_limit=2.0, headers={"User-Agent": "NOX"}, + input_type="username", output_type=["username", "domain"], + tags=["passive", "fast"], + health_check_url="https://api.github.com", reliability_score=5), + + _base("reddit_user", "social", + "https://www.reddit.com/user/{target}/about.json", "GET", + {"karma": "$.data.total_karma"}, + rate_limit=2.0, headers={"User-Agent": "NOX"}, + input_type="username", output_type=["username"], + tags=["passive"], + health_check_url="https://www.reddit.com", reliability_score=4), + + _base("gravatar", "social", + "https://www.gravatar.com/{target}.json", "GET", + {"name": "$.entry[0].displayName"}, + rate_limit=2.0, + input_type="email", output_type=["username"], + tags=["passive"], + health_check_url="https://www.gravatar.com", reliability_score=4), + + _base("anubis_subdomains", "dns_recon", + "https://jldc.me/anubis/subdomains/{target}", "GET", + {"subdomains": "$.*"}, + input_type="domain", output_type=["domain"], + tags=["passive"], + health_check_url="https://jldc.me", reliability_score=3, is_volatile=True), + + _base("sublist3r_api", "dns_recon", + "https://api.sublist3r.com/search.php?domain={target}", "GET", + {"subdomains": "$.*"}, + input_type="domain", output_type=["domain"], + tags=["passive"], + health_check_url="https://api.sublist3r.com", reliability_score=3, is_volatile=True), + + _base("keybase_lookup", "social", + "https://keybase.io/_/api/1.0/user/lookup.json?username={target}", "GET", + {"id": "$.them[0].id"}, + input_type="username", output_type=["username"], + tags=["passive"], + health_check_url="https://keybase.io", reliability_score=4), + + _base("keybase_proofs", "social", + "https://keybase.io/_/api/1.0/user/lookup.json?usernames={target}", "GET", + {"proofs": "$.them[0].proofs_summary.all[*].namestr"}, + input_type="username", output_type=["username"], + tags=["passive"], + health_check_url="https://keybase.io", reliability_score=4), + + _base("maltiverse_ip", "threat_intel", + "https://api.maltiverse.com/ip/{target}", "GET", + {"classification": "$.classification"}, + input_type="ip", output_type=["ip"], + tags=["passive", "threat"], + health_check_url="https://api.maltiverse.com", reliability_score=3), + + _base("threatminer_domain", "threat_intel", + "https://api.threatminer.org/v2/domain.php?q={target}&rt=1", "GET", + {"ips": "$.results"}, + input_type="domain", output_type=["ip"], + tags=["passive", "threat"], + health_check_url="https://api.threatminer.org", reliability_score=3, is_volatile=True), + + _base("threatminer_ip", "threat_intel", + "https://api.threatminer.org/v2/host.php?q={target}&rt=1", "GET", + {"urls": "$.results"}, + input_type="ip", output_type=["url"], + tags=["passive", "threat"], + health_check_url="https://api.threatminer.org", reliability_score=3, is_volatile=True), + + _base("robtex_ip", "network", + "https://freeapi.robtex.com/ipquery/{target}", "GET", + {"as": "$.asname"}, + input_type="ip", output_type=["domain"], + tags=["passive", "fast"], + health_check_url="https://freeapi.robtex.com", reliability_score=4), + + _base("wayback_machine", "archive", + "https://archive.org/wayback/available?url={target}", "GET", + {"snapshot": "$.archived_snapshots.closest.url"}, + input_type="url", output_type=["url"], + tags=["passive"], + health_check_url="https://archive.org", reliability_score=5), + + _base("ipvigilante", "geolocation", + "https://ipvigilante.com/json/{target}", "GET", + {"city": "$.data.city_name"}, + input_type="ip", output_type=["ip"], + tags=["passive"], + health_check_url="https://ipvigilante.com", reliability_score=3, is_volatile=True), + + _base("pypi_user", "social", + "https://pypi.org/pypi/{target}/json", "GET", + {"info": "$.info"}, + input_type="username", output_type=["username"], + tags=["passive"], + health_check_url="https://pypi.org", reliability_score=5), + + _base("npm_user", "social", + "https://registry.npmjs.org/-/v1/search?text=maintainer:{target}", "GET", + {"packages": "$.objects[*].package.name"}, + input_type="username", output_type=["username"], + tags=["passive"], + health_check_url="https://registry.npmjs.org", reliability_score=5), + + _base("gitlab_search", "social", + "https://gitlab.com/api/v4/users?username={target}", "GET", + {"id": "$.[*].id"}, + input_type="username", output_type=["username"], + tags=["passive"], + health_check_url="https://gitlab.com", reliability_score=5), + + _base("hackernews_user", "social", + "https://hacker-news.firebaseio.com/v0/user/{target}.json", "GET", + {"karma": "$.karma"}, + input_type="username", output_type=["username"], + tags=["passive", "fast"], + health_check_url="https://hacker-news.firebaseio.com", reliability_score=5), + + _base("scamwatcher", "threat_intel", + "https://www.scamwatcher.com/scam/search?q={target}", "GET", + {"results": "text_lines"}, + headers={"User-Agent": "Mozilla/5.0"}, + input_type="any", output_type=["domain"], + tags=["passive", "threat"], + health_check_url="https://www.scamwatcher.com", reliability_score=2, is_volatile=True), + + _base("phishtank_check", "threat_intel", + "https://checkurl.phishtank.com/checkurl/", "POST", + {"in_database": "$.results.in_database"}, + input_type="url", output_type=["url"], + payload_template={"url": "{target}", "format": "json"}, + tags=["passive", "threat"], + health_check_url="https://checkurl.phishtank.com", reliability_score=4), + + _base("duckduckgo_api", "search", + "https://api.duckduckgo.com/?q={target}&format=json", "GET", + {"abstract": "$.Abstract"}, + input_type="any", output_type=["url"], + tags=["passive", "fast"], + health_check_url="https://api.duckduckgo.com", reliability_score=5), + + _base("cve_search", "vulns", + "https://cve.circl.lu/api/cve/{target}", "GET", + {"summary": "$.summary"}, + input_type="cve", output_type=["cve"], + normalization_map={"summary": "vuln_description"}, + tags=["passive"], + health_check_url="https://cve.circl.lu", reliability_score=4), + + _base("cxsecurity", "vulns", + "https://cxsecurity.com/cvejson.php?cve={target}", "GET", + {"title": "$.title"}, + input_type="cve", output_type=["cve"], + tags=["passive"], + health_check_url="https://cxsecurity.com", reliability_score=3, is_volatile=True), + + _base("packetstorm", "vulns", + "https://packetstormsecurity.com/search/?q={target}", "GET", + {"results": "text_lines"}, + input_type="any", output_type=["url"], + tags=["passive"], + health_check_url="https://packetstormsecurity.com", reliability_score=4), + + _base("checkleaked", "breaches", + "https://api.checkleaked.cc/check/{target}", "GET", + {"found": "$.found"}, + input_type="email", output_type=["email"], + tags=["passive", "stealth"], + health_check_url="https://api.checkleaked.cc", reliability_score=2, is_volatile=True, + backup_endpoints=["https://checkleaked.cc/api/check/{target}"]), + + _base("scylla_sh_search", "breaches", + "https://scylla.sh/search?q={target}", "GET", + {"results": "$.*"}, + input_type="email", output_type=["email", "domain"], + tags=["passive", "stealth"], + health_check_url="https://scylla.sh", reliability_score=2, is_volatile=True, + backup_endpoints=["https://scylla.sh/api/search?q={target}"]), + + _base("vigilante_pw", "breaches", + "https://vigilante.pw/api/search?q={target}", "GET", + {"results": "$.results"}, + input_type="email", output_type=["email"], + tags=["passive", "stealth"], + health_check_url="https://vigilante.pw", reliability_score=2, is_volatile=True), +] + +# --------------------------------------------------------------------------- +# AUTHENTICATED / PREMIUM SOURCES +# --------------------------------------------------------------------------- + +AUTHENTICATED_PREMIUM_SOURCES: List[SourceConfig] = [ + + # ── Scanners ───────────────────────────────────────────────────────────── + + _auth("shodan_host", "scanners", + "https://api.shodan.io/shodan/host/{target}?key={SHODAN_API_KEY}", "GET", + {"ports": "$.ports", "vulns": "$.vulns"}, + api_key_slots=["{SHODAN_API_KEY}"], + input_type="ip", output_type=["domain", "vuln"], + normalization_map={"ports": "open_ports", "vulns": "cve_list"}, + tags=["passive", "infrastructure"], + health_check_url="https://api.shodan.io", reliability_score=5), + + _auth("shodan_search", "scanners", + "https://api.shodan.io/shodan/host/search?key={SHODAN_API_KEY}&query={target}", "GET", + {"ips": "$.matches[*].ip_str"}, + api_key_slots=["{SHODAN_API_KEY}"], + input_type="domain", output_type=["ip"], + normalization_map={"ip_str": "ip_address"}, + tags=["passive", "infrastructure"], + health_check_url="https://api.shodan.io", reliability_score=5), + + _auth("shodan_dns", "dns_recon", + "https://api.shodan.io/dns/domain/{target}?key={SHODAN_API_KEY}", "GET", + {"subdomains": "$.subdomains"}, + api_key_slots=["{SHODAN_API_KEY}"], + input_type="domain", output_type=["domain"], + tags=["passive", "infrastructure"], + health_check_url="https://api.shodan.io", reliability_score=5), + + _auth("shodan_exploits", "vulns", + "https://exploits.shodan.io/api/search?query={target}&key={SHODAN_API_KEY}", "GET", + {"total": "$.total"}, + api_key_slots=["{SHODAN_API_KEY}"], + input_type="cve", output_type=["cve"], + tags=["passive"], + health_check_url="https://exploits.shodan.io", reliability_score=5), + + _auth("censys_hosts", "scanners", + "https://search.censys.io/api/v2/hosts/search?q={target}", "GET", + {"results": "$.result.hits[*].ip"}, + headers={"Authorization": "Basic {CENSYS_AUTH_BASE64}"}, + api_key_slots=["{CENSYS_AUTH_BASE64}"], + input_type="domain", output_type=["ip"], + normalization_map={"ip": "ip_address"}, + tags=["passive", "infrastructure"], + health_check_url="https://search.censys.io", reliability_score=5), + + _auth("binaryedge_exposed", "scanners", + "https://api.binaryedge.io/v2/query/ip/{target}", "GET", + {"ports": "$.events[*].port"}, + headers={"X-Key": "{BINARYEDGE_API_KEY}"}, + api_key_slots=["{BINARYEDGE_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"port": "open_port"}, + tags=["passive", "infrastructure"], + health_check_url="https://api.binaryedge.io", reliability_score=4), + + _auth("binaryedge_dns", "dns_recon", + "https://api.binaryedge.io/v2/query/domains/subdomain/{target}", "GET", + {"subs": "$.subs"}, + headers={"X-Key": "{BINARYEDGE_API_KEY}"}, + api_key_slots=["{BINARYEDGE_API_KEY}"], + input_type="domain", output_type=["domain"], + tags=["passive"], + health_check_url="https://api.binaryedge.io", reliability_score=4), + + _auth("zoomeye_host", "scanners", + "https://api.zoomeye.org/host/search?query={target}", "GET", + {"hosts": "$.matches[*].ip"}, + headers={"API-KEY": "{ZOOMEYE_API_KEY}"}, + api_key_slots=["{ZOOMEYE_API_KEY}"], + input_type="domain", output_type=["ip"], + tags=["passive", "infrastructure"], + health_check_url="https://api.zoomeye.org", reliability_score=4), + + _auth("fofa_info", "scanners", + "https://fofa.info/api/v1/search/all?email={FOFA_EMAIL}&key={FOFA_API_KEY}&qbase64={target}", "GET", + {"results": "$.results"}, + api_key_slots=["{FOFA_API_KEY}", "{FOFA_EMAIL}"], + input_type="domain", output_type=["ip", "domain"], + tags=["passive", "infrastructure"], + health_check_url="https://fofa.info", reliability_score=4), + + _auth("spyse_domain", "scanners", + "https://api.spyse.com/v1/domain/details/{target}", "GET", + {"asn": "$.data.asn"}, + headers={"Authorization": "Bearer {SPYSE_API_KEY}"}, + api_key_slots=["{SPYSE_API_KEY}"], + input_type="domain", output_type=["ip"], + tags=["passive"], + health_check_url="https://api.spyse.com", reliability_score=3), + + _auth("spyse_ip", "scanners", + "https://api.spyse.com/v1/ip/details/{target}", "GET", + {"geo": "$.data.geo"}, + headers={"Authorization": "Bearer {SPYSE_API_KEY}"}, + api_key_slots=["{SPYSE_API_KEY}"], + input_type="ip", output_type=["ip"], + tags=["passive"], + health_check_url="https://api.spyse.com", reliability_score=3), + + _auth("onyphe_datascan", "scanners", + "https://www.onyphe.io/api/v2/simple/datascan/{target}", "GET", + {"results": "$.results"}, + headers={"Authorization": "apikey {ONYPHE_API_KEY}"}, + api_key_slots=["{ONYPHE_API_KEY}"], + input_type="ip", output_type=["ip", "domain"], + tags=["passive", "infrastructure"], + health_check_url="https://www.onyphe.io", reliability_score=4), + + _auth("criminalip_asset", "scanners", + "https://api.criminalip.io/v1/asset/ip/report?ip={target}", "GET", + {"score": "$.score"}, + headers={"x-api-key": "{CRIMINALIP_API_KEY}"}, + api_key_slots=["{CRIMINALIP_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"score": "risk_score"}, + tags=["passive", "threat"], + health_check_url="https://api.criminalip.io", reliability_score=4), + + # ── Threat Intel ───────────────────────────────────────────────────────── + + _auth("virustotal_domain", "threat_intel", + "https://www.virustotal.com/api/v3/domains/{target}", "GET", + {"malicious": "$.data.attributes.last_analysis_stats.malicious"}, + rate_limit=15.0, headers={"x-apikey": "{VIRUSTOTAL_API_KEY}"}, + api_key_slots=["{VIRUSTOTAL_API_KEY}"], + input_type="domain", output_type=["domain"], + normalization_map={"malicious": "malicious_count"}, + tags=["passive", "threat"], + health_check_url="https://www.virustotal.com", reliability_score=5), + + _auth("virustotal_ip", "threat_intel", + "https://www.virustotal.com/api/v3/ip_addresses/{target}", "GET", + {"reputation": "$.data.attributes.reputation"}, + rate_limit=15.0, headers={"x-apikey": "{VIRUSTOTAL_API_KEY}"}, + api_key_slots=["{VIRUSTOTAL_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"reputation": "vt_reputation"}, + tags=["passive", "threat"], + health_check_url="https://www.virustotal.com", reliability_score=5), + + _auth("greynoise_community", "threat_intel", + "https://api.greynoise.io/v3/community/{target}", "GET", + {"noise": "$.noise", "classification": "$.classification"}, + headers={"key": "{GREYNOISE_API_KEY}"}, + api_key_slots=["{GREYNOISE_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"noise": "is_noise", "classification": "threat_class"}, + tags=["passive", "threat"], + health_check_url="https://api.greynoise.io", reliability_score=5), + + _auth("abuseipdb", "threat_intel", + "https://api.abuseipdb.com/api/v2/check?ipAddress={target}", "GET", + {"score": "$.data.abuseConfidenceScore"}, + headers={"Key": "{ABUSEIPDB_API_KEY}"}, + api_key_slots=["{ABUSEIPDB_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"abuseConfidenceScore": "abuse_score"}, + tags=["passive", "threat"], + health_check_url="https://api.abuseipdb.com", reliability_score=5), + + _auth("pulsedive_analyze", "threat_intel", + "https://pulsedive.com/api/analyze.php?value={target}", "GET", + {"risk": "$.risk"}, + headers={"key": "{PULSEDIVE_API_KEY}"}, + api_key_slots=["{PULSEDIVE_API_KEY}"], + input_type="any", output_type=["ip", "domain"], + normalization_map={"risk": "risk_level"}, + tags=["passive", "threat"], + health_check_url="https://pulsedive.com", reliability_score=4), + + _auth("metadefender_ip", "threat_intel", + "https://api.metadefender.com/v4/ip/{target}", "GET", + {"lookup": "$.lookup_results"}, + headers={"apikey": "{METADEFENDER_API_KEY}"}, + api_key_slots=["{METADEFENDER_API_KEY}"], + input_type="ip", output_type=["ip"], + tags=["passive", "threat"], + health_check_url="https://api.metadefender.com", reliability_score=4), + + _auth("recordedfuture_ip", "threat_intel", + "https://api.recordedfuture.com/v2/ip/{target}", "GET", + {"risk": "$.data.risk.score"}, + headers={"X-RFToken": "{RF_TOKEN}"}, + api_key_slots=["{RF_TOKEN}"], + input_type="ip", output_type=["ip"], + normalization_map={"score": "rf_risk_score"}, + tags=["passive", "threat"], + health_check_url="https://api.recordedfuture.com", reliability_score=5), + + _auth("vulners_search", "threat_intel", + "https://vulners.com/api/v3/search/lucene/?query={target}", "GET", + {"results": "$.data.search[*]._source.title"}, + headers={"X-Vulners-Api-Key": "{VULNERS_API_KEY}"}, + api_key_slots=["{VULNERS_API_KEY}"], + input_type="cve", output_type=["cve"], + tags=["passive"], + health_check_url="https://vulners.com", reliability_score=4), + + _auth("urlvoid", "threat_intel", + "https://api.urlvoid.com/api1000/{URLVOID_API_KEY}/host/{target}", "GET", + {"detections": "$.detections"}, + api_key_slots=["{URLVOID_API_KEY}"], + input_type="domain", output_type=["domain"], + tags=["passive", "threat"], + health_check_url="https://api.urlvoid.com", reliability_score=4), + + _auth("fraudlabspro", "threat_intel", + "https://api.fraudlabspro.com/v1/ip/check?key={FLP_API_KEY}&ip={target}", "GET", + {"fraud": "$.fraudlabspro_score"}, + api_key_slots=["{FLP_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"fraudlabspro_score": "fraud_score"}, + tags=["passive", "threat"], + health_check_url="https://api.fraudlabspro.com", reliability_score=4), + + _auth("google_safebrowsing", "threat_intel", + "https://safebrowsing.googleapis.com/v4/threatMatches:find?key={GOOGLE_API_KEY}", "POST", + {"matches": "$.matches"}, + api_key_slots=["{GOOGLE_API_KEY}"], + input_type="url", output_type=["url"], + payload_template={"client": {"clientId": "nox", "clientVersion": "1.0"}, + "threatInfo": {"threatTypes": ["MALWARE", "SOCIAL_ENGINEERING"], + "platformTypes": ["ANY_PLATFORM"], + "threatEntryTypes": ["URL"], + "threatEntries": [{"url": "{target}"}]}}, + tags=["passive", "threat"], + health_check_url="https://safebrowsing.googleapis.com", reliability_score=5), + + _auth("threatconnect_search", "threat_intel", + "https://api.threatconnect.com/v2/indicators/{target}", "GET", + {"data": "$.data"}, + headers={"Authorization": "TC {TC_API_KEY}:{TC_SIGNATURE}"}, + api_key_slots=["{TC_API_KEY}"], + input_type="any", output_type=["ip", "domain"], + tags=["passive", "threat"], + health_check_url="https://api.threatconnect.com", reliability_score=4), + + _auth("threatportal", "threat_intel", + "https://threatportal.io/api/v1/search?q={target}", "GET", + {"results": "$.results"}, + headers={"Authorization": "Bearer {TP_API_KEY}"}, + api_key_slots=["{TP_API_KEY}"], + input_type="any", output_type=["ip", "domain"], + tags=["passive", "threat"], + health_check_url="https://threatportal.io", reliability_score=3, is_volatile=True), + + _auth("malshare", "threat_intel", + "https://malshare.com/api.php?api_key={MALSHARE_API_KEY}&action=search&query={target}", "GET", + {"hashes": "$.*"}, + api_key_slots=["{MALSHARE_API_KEY}"], + input_type="hash", output_type=["hash"], + tags=["passive", "threat"], + health_check_url="https://malshare.com", reliability_score=3), + + _auth("hybrid_analysis", "threat_intel", + "https://www.hybrid-analysis.com/api/v2/search/hash", "POST", + {"verdict": "$.verdict"}, + headers={"api-key": "{HYBRID_API_KEY}"}, + api_key_slots=["{HYBRID_API_KEY}"], + input_type="hash", output_type=["hash"], + payload_template={"hash": "{target}"}, + normalization_map={"verdict": "malware_verdict"}, + tags=["passive", "threat", "heavy"], + health_check_url="https://www.hybrid-analysis.com", reliability_score=4), + + _auth("joesandbox", "threat_intel", + "https://www.joesandbox.com/api/v2/analysis/search?q={target}", "GET", + {"id": "$.[*].id"}, + headers={"X-JoeSandbox-Api-Key": "{JOE_API_KEY}"}, + api_key_slots=["{JOE_API_KEY}"], + input_type="hash", output_type=["hash"], + tags=["passive", "threat", "heavy"], + health_check_url="https://www.joesandbox.com", reliability_score=4), + + _auth("anyrun", "threat_intel", + "https://api.any.run/v1/analysis?hash={target}", "GET", + {"tasks": "$.tasks"}, + headers={"Authorization": "API-Key {ANYRUN_API_KEY}"}, + api_key_slots=["{ANYRUN_API_KEY}"], + input_type="hash", output_type=["hash"], + tags=["passive", "threat", "heavy"], + health_check_url="https://api.any.run", reliability_score=4), + + _auth("intezer", "threat_intel", + "https://analyze.intezer.com/api/v2-0/get-analysis-by-hash/{target}", "GET", + {"result": "$.result"}, + headers={"Authorization": "Bearer {INTEZER_API_KEY}"}, + api_key_slots=["{INTEZER_API_KEY}"], + input_type="hash", output_type=["hash"], + tags=["passive", "threat"], + health_check_url="https://analyze.intezer.com", reliability_score=4), + + _auth("misp_search", "threat_intel", + "{MISP_URL}/attributes/restSearch", "POST", + {"attributes": "$.Attribute[*].value"}, + headers={"Authorization": "{MISP_API_KEY}", "Content-Type": "application/json"}, + api_key_slots=["{MISP_API_KEY}"], + input_type="any", output_type=["ip", "domain", "hash"], + payload_template={"returnFormat": "json", "value": "{target}"}, + tags=["passive", "threat"], + health_check_url="{MISP_URL}", reliability_score=4), +] + +AUTHENTICATED_PREMIUM_SOURCES += [ + + # ── Breaches ───────────────────────────────────────────────────────────── + + _auth("hibp_breached", "breaches", + "https://haveibeenpwned.com/api/v3/breachedaccount/{target}", "GET", + {"breaches": "$.*.Name"}, + rate_limit=1.5, + headers={"hibp-api-key": "{HIBP_API_KEY}", "User-Agent": "NOX-Framework"}, + api_key_slots=["{HIBP_API_KEY}"], + input_type="email", output_type=["email", "domain"], + normalization_map={"Name": "breach_name"}, + tags=["passive", "stealth"], + health_check_url="https://haveibeenpwned.com", reliability_score=5), + + _auth("dehashed", "breaches", + "https://api.dehashed.com/search?query={target}", "GET", + {"entries": "$.entries"}, + headers={"Authorization": "Basic {DEHASHED_AUTH_BASE64}", "Accept": "application/json"}, + api_key_slots=["{DEHASHED_AUTH_BASE64}"], + input_type="email", output_type=["email", "username", "ip"], + normalization_map={"email": "email_address", "username": "username", + "password": "plaintext_password", "hashed_password": "password_hash", + "ip_address": "ip_address", "name": "full_name"}, + tags=["passive", "stealth"], + health_check_url="https://api.dehashed.com", reliability_score=5), + + _auth("snusbase", "breaches", + "https://api.snusbase.com/data/search", "POST", + {"leaks": "$.results"}, + headers={"Auth": "{SNUSBASE_API_KEY}", "Content-Type": "application/json"}, + api_key_slots=["{SNUSBASE_API_KEY}"], + input_type="email", output_type=["email", "username"], + payload_template={"terms": ["{target}"], "types": ["email"]}, + normalization_map={"email": "email_address", "username": "username", + "password": "plaintext_password", "hash": "password_hash"}, + tags=["passive", "stealth"], + health_check_url="https://api.snusbase.com", reliability_score=4), + + _auth("intelx_search", "breaches", + "https://2.intelx.io/intelligent/search", "POST", + {"id": "$.id"}, + headers={"x-key": "{INTELX_API_KEY}"}, + api_key_slots=["{INTELX_API_KEY}"], + input_type="email", output_type=["email", "domain"], + payload_template={"term": "{target}", "buckets": [], "lookuplevel": 0, + "maxresults": 100, "timeout": 0, "datefrom": "", "dateto": "", + "sort": 4, "media": 0, "terminate": []}, + tags=["passive", "stealth"], + health_check_url="https://2.intelx.io", reliability_score=5), + + _auth("intelx_phone", "breaches", + "https://2.intelx.io/phone/search?phone={target}", "GET", + {"results": "$.results"}, + headers={"x-key": "{INTELX_API_KEY}"}, + api_key_slots=["{INTELX_API_KEY}"], + input_type="phone", output_type=["phone"], + tags=["passive"], + health_check_url="https://2.intelx.io", reliability_score=5), + + _auth("leakcheck", "breaches", + "https://leakcheck.io/api/v2/query/{target}", "GET", + {"sources": "$.sources"}, + headers={"X-API-Key": "{LEAKCHECK_API_KEY}"}, + api_key_slots=["{LEAKCHECK_API_KEY}"], + input_type="email", output_type=["email"], + normalization_map={"sources": "breach_sources"}, + tags=["passive", "stealth"], + health_check_url="https://leakcheck.io", reliability_score=4), + + _auth("spycloud_breach", "breaches", + "https://api.spycloud.io/enterprise-v2/breach/data/emails/{target}", "GET", + {"results": "$.results"}, + headers={"X-API-Key": "{SPYCLOUD_API_KEY}"}, + api_key_slots=["{SPYCLOUD_API_KEY}"], + input_type="email", output_type=["email", "username", "ip"], + normalization_map={"email": "email_address", "username": "username", + "password": "plaintext_password", "ip_addresses": "ip_address"}, + tags=["passive", "stealth"], + health_check_url="https://api.spycloud.io", reliability_score=5), + + _auth("leakix_search", "breaches", + "https://leakix.net/api/search?q={target}", "GET", + {"leaks": "$.[*].event_source"}, + headers={"api-key": "{LEAKIX_API_KEY}"}, + api_key_slots=["{LEAKIX_API_KEY}"], + input_type="domain", output_type=["domain", "ip"], + tags=["passive"], + health_check_url="https://leakix.net", reliability_score=4), + + _auth("breachdirectory", "breaches", + "https://breachdirectory.com/api/search?key={BD_API_KEY}&email={target}", "GET", + {"found": "$.found"}, + api_key_slots=["{BD_API_KEY}"], + input_type="email", output_type=["email"], + tags=["passive", "stealth"], + health_check_url="https://breachdirectory.com", reliability_score=4), + + _auth("breachaware", "breaches", + "https://api.breachaware.com/v1/search?query={target}", "GET", + {"breaches": "$.breaches"}, + headers={"X-API-KEY": "{BA_API_KEY}"}, + api_key_slots=["{BA_API_KEY}"], + input_type="email", output_type=["email"], + tags=["passive", "stealth"], + health_check_url="https://api.breachaware.com", reliability_score=3, is_volatile=True), + + _auth("tines_breach", "breaches", + "https://api.tines.com/breaches/{target}", "GET", + {"breaches": "$.breaches"}, + headers={"Authorization": "Bearer {TINES_API_KEY}"}, + api_key_slots=["{TINES_API_KEY}"], + input_type="email", output_type=["email"], + tags=["passive"], + health_check_url="https://api.tines.com", reliability_score=3), + + _auth("leakstats_pw", "breaches", + "https://leakstats.net/api/password/{target}", "GET", + {"count": "$.count"}, + headers={"api-key": "{LEAKSTATS_API_KEY}"}, + api_key_slots=["{LEAKSTATS_API_KEY}"], + input_type="hash", output_type=["hash"], + tags=["passive"], + health_check_url="https://leakstats.net", reliability_score=3, is_volatile=True), + + _base("leak_lookup", "breaches", + "https://leak-lookup.com/api/search", "POST", + {"results": "$.message"}, + input_type="email", output_type=["email"], + payload_template={"query": "{target}", "type": "email_address"}, + tags=["passive", "stealth"], + health_check_url="https://leak-lookup.com", reliability_score=3, is_volatile=True), + + _auth("cit0day", "breaches", + "https://cit0day.in/api/v1/search?query={target}", "GET", + {"results": "$.results"}, + headers={"Authorization": "Bearer {CIT0DAY_API_KEY}"}, + api_key_slots=["{CIT0DAY_API_KEY}"], + input_type="email", output_type=["email"], + tags=["passive", "stealth"], + health_check_url="https://cit0day.in", reliability_score=2, is_volatile=True), + + # ── DNS Recon ───────────────────────────────────────────────────────────── + + _auth("securitytrails_sub", "dns_recon", + "https://api.securitytrails.com/v1/domain/{target}/subdomains", "GET", + {"subdomains": "$.subdomains"}, + headers={"APIKEY": "{SECURITYTRAILS_API_KEY}"}, + api_key_slots=["{SECURITYTRAILS_API_KEY}"], + input_type="domain", output_type=["domain"], + tags=["passive"], + health_check_url="https://api.securitytrails.com", reliability_score=5), + + _auth("securitytrails_history", "dns_recon", + "https://api.securitytrails.com/v1/history/{target}/dns/a", "GET", + {"history": "$.records[*].values[*].ip"}, + headers={"APIKEY": "{SECURITYTRAILS_API_KEY}"}, + api_key_slots=["{SECURITYTRAILS_API_KEY}"], + input_type="domain", output_type=["ip"], + normalization_map={"ip": "historical_ip"}, + tags=["passive"], + health_check_url="https://api.securitytrails.com", reliability_score=5), + + _auth("circl_lu_pdns", "dns_recon", + "https://www.circl.lu/pdns/query/{target}", "GET", + {"resolutions": "$.[*].rdata"}, + headers={"Authorization": "Basic {CIRCL_AUTH_BASE64}"}, + api_key_slots=["{CIRCL_AUTH_BASE64}"], + input_type="domain", output_type=["ip"], + tags=["passive"], + health_check_url="https://www.circl.lu", reliability_score=4), + + _auth("viewdns_reverse_ip", "dns_recon", + "https://api.viewdns.info/reverseip/?host={target}&apikey={VIEWDNS_API_KEY}&output=json", "GET", + {"domains": "$.response.domains[*].name"}, + api_key_slots=["{VIEWDNS_API_KEY}"], + input_type="ip", output_type=["domain"], + tags=["passive"], + health_check_url="https://api.viewdns.info", reliability_score=4), + + _auth("dnsdb_pdns", "dns_recon", + "https://api.dnsdb.info/lookup/rrset/name/{target}", "GET", + {"rdata": "$.[*].rdata"}, + headers={"X-API-Key": "{DNSDB_API_KEY}"}, + api_key_slots=["{DNSDB_API_KEY}"], + input_type="domain", output_type=["ip"], + tags=["passive"], + health_check_url="https://api.dnsdb.info", reliability_score=5), + + _auth("spyonweb", "dns_recon", + "https://api.spyonweb.com/v1/summary/{target}?access_token={SPYONWEB_API_KEY}", "GET", + {"adsense": "$.result.adsense"}, + api_key_slots=["{SPYONWEB_API_KEY}"], + input_type="domain", output_type=["domain"], + tags=["passive"], + health_check_url="https://api.spyonweb.com", reliability_score=3), + + # ── WHOIS ───────────────────────────────────────────────────────────────── + + _auth("passivetotal_whois", "whois", + "https://api.passivetotal.org/v2/whois?query={target}", "GET", + {"registrar": "$.registrar"}, + headers={"Authorization": "Basic {PASSIVETOTAL_AUTH_BASE64}"}, + api_key_slots=["{PASSIVETOTAL_AUTH_BASE64}"], + input_type="domain", output_type=["email", "domain"], + normalization_map={"registrar": "registrar_name"}, + tags=["passive"], + health_check_url="https://api.passivetotal.org", reliability_score=4), + + _auth("whoisxml_api", "whois", + "https://www.whoisxmlapi.com/whoisserver/WhoisService?apiKey={WHOISXML_API_KEY}&domainName={target}&outputFormat=JSON", "GET", + {"created": "$.WhoisRecord.createdDate"}, + api_key_slots=["{WHOISXML_API_KEY}"], + input_type="domain", output_type=["email", "domain"], + normalization_map={"createdDate": "registration_date"}, + tags=["passive"], + health_check_url="https://www.whoisxmlapi.com", reliability_score=5), + + _auth("whoxy_whois", "whois", + "https://api.whoxy.com/?key={WHOXY_API_KEY}&whois={target}", "GET", + {"registrar": "$.registrar_name"}, + api_key_slots=["{WHOXY_API_KEY}"], + input_type="domain", output_type=["email", "domain"], + tags=["passive"], + health_check_url="https://api.whoxy.com", reliability_score=4), + + _auth("whois_freaks", "whois", + "https://whoisfreaks.com/api/v1/whois?apiKey={WF_API_KEY}&whois=live&domainName={target}", "GET", + {"emails": "$.whois_record.registrant_contact.email_address"}, + api_key_slots=["{WF_API_KEY}"], + input_type="domain", output_type=["email"], + tags=["passive"], + health_check_url="https://whoisfreaks.com", reliability_score=4), + + _auth("domaintools_whois", "whois", + "https://api.domaintools.com/v1/{target}/whois/", "GET", + {"whois": "$.response.whois.record"}, + headers={"Authorization": "Basic {DT_AUTH_BASE64}"}, + api_key_slots=["{DT_AUTH_BASE64}"], + input_type="domain", output_type=["email", "domain"], + tags=["passive"], + health_check_url="https://api.domaintools.com", reliability_score=5), + + # ── Enrichment ──────────────────────────────────────────────────────────── + + _auth("clearbit_enrich", "enrichment", + "https://person.clearbit.com/v2/people/find?email={target}", "GET", + {"full_name": "$.name.fullName"}, + headers={"Authorization": "Bearer {CLEARBIT_API_KEY}"}, + api_key_slots=["{CLEARBIT_API_KEY}"], + input_type="email", output_type=["username", "domain"], + normalization_map={"fullName": "full_name"}, + tags=["passive"], + health_check_url="https://person.clearbit.com", reliability_score=4), + + _auth("fullcontact", "enrichment", + "https://api.fullcontact.com/v3/person.enrich", "POST", + {"social": "$.socialProfiles"}, + headers={"Authorization": "Bearer {FULLCONTACT_API_KEY}"}, + api_key_slots=["{FULLCONTACT_API_KEY}"], + input_type="email", output_type=["username", "domain"], + payload_template={"email": "{target}"}, + tags=["passive"], + health_check_url="https://api.fullcontact.com", reliability_score=4), + + _auth("passivetotal_enrich", "enrichment", + "https://api.passivetotal.org/v2/enrichment?query={target}", "GET", + {"tags": "$.tags"}, + headers={"Authorization": "Basic {PASSIVETOTAL_AUTH_BASE64}"}, + api_key_slots=["{PASSIVETOTAL_AUTH_BASE64}"], + input_type="domain", output_type=["domain"], + tags=["passive"], + health_check_url="https://api.passivetotal.org", reliability_score=4), + + _auth("pipl_search", "enrichment", + "https://api.pipl.com/search/?email={target}&key={PIPL_API_KEY}", "GET", + {"person": "$.person"}, + api_key_slots=["{PIPL_API_KEY}"], + input_type="email", output_type=["username", "domain", "phone"], + tags=["passive"], + health_check_url="https://api.pipl.com", reliability_score=4), + + # ── Email Reputation ────────────────────────────────────────────────────── + + _auth("ipqualityscore_email", "email_rep", + "https://ipqualityscore.com/api/json/email/{IPQS_API_KEY}/{target}", "GET", + {"fraud_score": "$.fraud_score"}, + api_key_slots=["{IPQS_API_KEY}"], + input_type="email", output_type=["email"], + normalization_map={"fraud_score": "email_fraud_score"}, + tags=["passive", "fast"], + health_check_url="https://ipqualityscore.com", reliability_score=4), + + _auth("emailhippo", "email_rep", + "https://api.emailhippo.com/v3/verify?apiKey={HIPPO_API_KEY}&email={target}", "GET", + {"status": "$.meta.status"}, + api_key_slots=["{HIPPO_API_KEY}"], + input_type="email", output_type=["email"], + tags=["passive", "fast"], + health_check_url="https://api.emailhippo.com", reliability_score=4), + + _auth("zerobounce", "email_rep", + "https://api.zerobounce.net/v2/validate?api_key={ZEROBOUNCE_API_KEY}&email={target}", "GET", + {"status": "$.status"}, + api_key_slots=["{ZEROBOUNCE_API_KEY}"], + input_type="email", output_type=["email"], + normalization_map={"status": "email_validity"}, + tags=["passive", "fast"], + health_check_url="https://api.zerobounce.net", reliability_score=4), + + _auth("hunter_verify", "email_rep", + "https://api.hunter.io/v2/email-verifier?email={target}&api_key={HUNTER_API_KEY}", "GET", + {"result": "$.data.result"}, + api_key_slots=["{HUNTER_API_KEY}"], + input_type="email", output_type=["email"], + tags=["passive", "fast"], + health_check_url="https://api.hunter.io", reliability_score=4), + + _auth("mailboxlayer", "email_rep", + "http://apilayer.net/api/check?access_key={MAILBOX_API_KEY}&email={target}", "GET", + {"score": "$.score"}, + api_key_slots=["{MAILBOX_API_KEY}"], + input_type="email", output_type=["email"], + tags=["passive"], + health_check_url="http://apilayer.net", reliability_score=3), + + _auth("abstract_email", "email_rep", + "https://emailvalidation.abstractapi.com/v1/?api_key={ABSTRACT_API_KEY}&email={target}", "GET", + {"quality": "$.quality_score"}, + api_key_slots=["{ABSTRACT_API_KEY}"], + input_type="email", output_type=["email"], + tags=["passive", "fast"], + health_check_url="https://emailvalidation.abstractapi.com", reliability_score=4), + + # ── Discovery / Social ──────────────────────────────────────────────────── + + _auth("hunter_io", "discovery", + "https://api.hunter.io/v2/domain-search?domain={target}&api_key={HUNTER_API_KEY}", "GET", + {"emails": "$.data.emails[*].value"}, + api_key_slots=["{HUNTER_API_KEY}"], + input_type="domain", output_type=["email"], + normalization_map={"value": "email_address"}, + tags=["passive"], + health_check_url="https://api.hunter.io", reliability_score=5), + + _auth("twitter_v2", "social", + "https://api.twitter.com/2/users/by/username/{target}", "GET", + {"id": "$.data.id"}, + headers={"Authorization": "Bearer {TWITTER_BEARER_TOKEN}"}, + api_key_slots=["{TWITTER_BEARER_TOKEN}"], + input_type="username", output_type=["username"], + tags=["passive"], + health_check_url="https://api.twitter.com", reliability_score=4), + + _auth("github_code_search", "code", + "https://api.github.com/search/code?q={target}", "GET", + {"urls": "$.items[*].html_url"}, + headers={"Authorization": "token {GITHUB_TOKEN}"}, + api_key_slots=["{GITHUB_TOKEN}"], + input_type="any", output_type=["url"], + tags=["passive"], + health_check_url="https://api.github.com", reliability_score=5), + + _auth("github_search_repos", "social", + "https://api.github.com/search/repositories?q={target}", "GET", + {"total": "$.total_count"}, + headers={"Authorization": "token {GITHUB_TOKEN}"}, + api_key_slots=["{GITHUB_TOKEN}"], + input_type="username", output_type=["username"], + tags=["passive"], + health_check_url="https://api.github.com", reliability_score=5), + + # ── Geolocation ─────────────────────────────────────────────────────────── + + _auth("ipstack", "geolocation", + "http://api.ipstack.com/{target}?access_key={IPSTACK_API_KEY}", "GET", + {"country": "$.country_name"}, + api_key_slots=["{IPSTACK_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"country_name": "geo_country"}, + tags=["passive", "fast"], + health_check_url="http://api.ipstack.com", reliability_score=4), + + _auth("ipgeolocation_io", "geolocation", + "https://api.ipgeolocation.io/ipgeo?apiKey={IPGEO_API_KEY}&ip={target}", "GET", + {"isp": "$.isp"}, + api_key_slots=["{IPGEO_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"isp": "asn_org"}, + tags=["passive", "fast"], + health_check_url="https://api.ipgeolocation.io", reliability_score=4), + + _auth("ipdata_co", "geolocation", + "https://api.ipdata.co/{target}?api-key={IPDATA_API_KEY}", "GET", + {"threat": "$.threat"}, + api_key_slots=["{IPDATA_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"threat": "threat_info"}, + tags=["passive", "fast"], + health_check_url="https://api.ipdata.co", reliability_score=4), + + _auth("extreme_ip_lookup", "geolocation", + "https://extreme-ip-lookup.com/json/{target}?key={EXTREME_API_KEY}", "GET", + {"org": "$.org"}, + api_key_slots=["{EXTREME_API_KEY}"], + input_type="ip", output_type=["ip"], + tags=["passive"], + health_check_url="https://extreme-ip-lookup.com", reliability_score=3), + + _auth("ipinfodb", "geolocation", + "http://api.ipinfodb.com/v3/ip-city/?key={IPINFODB_API_KEY}&ip={target}&format=json", "GET", + {"city": "$.cityName"}, + api_key_slots=["{IPINFODB_API_KEY}"], + input_type="ip", output_type=["ip"], + normalization_map={"cityName": "geo_city"}, + tags=["passive"], + health_check_url="http://api.ipinfodb.com", reliability_score=3), + + # ── Phone ───────────────────────────────────────────────────────────────── + + _auth("numverify", "phone", + "http://apilayer.net/api/validate?access_key={NUMVERIFY_API_KEY}&number={target}", "GET", + {"valid": "$.valid", "carrier": "$.carrier"}, + api_key_slots=["{NUMVERIFY_API_KEY}"], + input_type="phone", output_type=["phone"], + normalization_map={"valid": "phone_valid", "carrier": "phone_carrier"}, + tags=["passive"], + health_check_url="http://apilayer.net", reliability_score=4), + + # ── Hashes ──────────────────────────────────────────────────────────────── + + _auth("hashes_org", "hashes", + "https://hashes.org/api.php?key={HASHES_API_KEY}&query={target}", "GET", + {"found": "$.results"}, + api_key_slots=["{HASHES_API_KEY}"], + input_type="hash", output_type=["hash"], + tags=["passive"], + health_check_url="https://hashes.org", reliability_score=3), + + # ── Search ──────────────────────────────────────────────────────────────── + + _auth("google_search_custom", "search", + "https://www.googleapis.com/customsearch/v1?key={GOOGLE_CX_KEY}&cx={GOOGLE_CX_ID}&q={target}", "GET", + {"items": "$.items[*].link"}, + api_key_slots=["{GOOGLE_CX_KEY}", "{GOOGLE_CX_ID}"], + input_type="any", output_type=["url"], + tags=["passive"], + health_check_url="https://www.googleapis.com", reliability_score=5), + + _auth("bing_search_api", "search", + "https://api.bing.microsoft.com/v7.0/search?q={target}", "GET", + {"urls": "$.webPages.value[*].url"}, + headers={"Ocp-Apim-Subscription-Key": "{BING_API_KEY}"}, + api_key_slots=["{BING_API_KEY}"], + input_type="any", output_type=["url"], + tags=["passive"], + health_check_url="https://api.bing.microsoft.com", reliability_score=5), +] + + +# --------------------------------------------------------------------------- +# Builder +# --------------------------------------------------------------------------- + +def build_nox_sources(output_dir: str = None) -> None: + # H3: resolve output_dir relative to this script's location, not CWD. + # This ensures `python /opt/nox-cli/build_sources.py` from any directory + # always writes to /opt/nox-cli/sources/ instead of ./sources/. + if output_dir is None: + output_dir = str(Path(__file__).resolve().parent / "sources") + os.makedirs(output_dir, exist_ok=True) + + all_sources: List[SourceConfig] = FREE_PUBLIC_SOURCES + AUTHENTICATED_PREMIUM_SOURCES + errors: List[str] = [] + written = 0 + + for src in all_sources: + dest = os.path.join(output_dir, f"{src.name}.json") + payload = src.to_json() + + tmp_fd, tmp_path = tempfile.mkstemp(dir=output_dir, suffix=".tmp") + try: + with os.fdopen(tmp_fd, "w", encoding="utf-8") as fh: + fh.write(payload) + os.replace(tmp_path, dest) + written += 1 + except Exception as exc: + os.unlink(tmp_path) + errors.append(f"[ERROR] '{src.name}': {exc}") + + print(f"Done. {written}/{len(all_sources)} source files written to '{output_dir}/'.") + if errors: + print("\nErrors:") + for e in errors: + print(" ", e) + + # ── Remove orphaned JSON plugins (§2.3) ─────────────────────────── + # Any .json in sources/ not produced by this build is stale and would + # be silently loaded at runtime by SourceOrchestrator. Remove it. + expected_filenames = {f"{src.name}.json" for src in all_sources} + removed = 0 + for fname in os.listdir(output_dir): + if not fname.endswith(".json"): + continue + fpath = os.path.join(output_dir, fname) + if os.path.isfile(fpath) and fname not in expected_filenames: + try: + os.unlink(fpath) + print(f"[cleanup] Removed orphaned plugin: {fname}") + removed += 1 + except OSError as exc: + print(f"[WARN] Could not remove orphaned plugin {fname}: {exc}") + if removed: + print(f"[cleanup] {removed} orphaned plugin(s) removed.") + + # ── Seed apikeys.json (never overwrites an existing file) ────────── + if _APIKEYS_FILE is not None and _default_store is not None and _write_store is not None: + if not _APIKEYS_FILE.exists(): + try: + _write_store(_default_store()) + print(f"Created API key template: {_APIKEYS_FILE}") + print(f" All {len(_default_store())} private keys set to '{UNIVERSAL_PLACEHOLDER}'.") + print(" Edit that file to configure your keys before scanning.") + except Exception as exc: + print(f"[WARN] Could not create apikeys.json: {exc}") + else: + print(f"API key file already exists — not overwritten: {_APIKEYS_FILE}") + + +if __name__ == "__main__": + build_nox_sources() diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..1131b07 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +nox-cli (1.0.0-1) kali-dev; urgency=low + + * Initial release to Kali Linux. + + -- nox-project Thu, 02 Apr 2026 20:00:00 +0200 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..27a2e79 --- /dev/null +++ b/debian/control @@ -0,0 +1,35 @@ +Source: nox-cli +Section: net +Priority: optional +Maintainer: nox-project +Build-Depends: debhelper-compat (= 13), dh-python, python3-all, python3-setuptools +Standards-Version: 4.6.2 +Rules-Requires-Root: no +Homepage: https://github.com/nox-project/nox-framework + +Package: nox-cli +Architecture: all +Depends: ${python3:Depends}, ${misc:Depends}, + python3-requests, + python3-aiohttp, + python3-pydantic, + python3-colorama, + python3-rich, + python3-bs4, + python3-lxml, + python3-dnspython, + python3-phonenumbers, + python3-aiosqlite | python3-pip +Recommends: + python3-stem, + tor +Description: Advanced Asynchronous Cyber Threat Intelligence Framework. + nox-cli is an open-source OSINT and breach intelligence framework + supporting 120+ JSON-plugin data sources. It performs asynchronous + multi-source lookups against email addresses, domains, IP addresses, + usernames, phone numbers, and hashes. Features include recursive + identity pivoting, risk scoring, HVT detection, dorking, scraping, + hash cracking, and forensic PDF reporting. + . + All Python dependencies are installed inside an isolated virtual + environment at /opt/nox-cli/.venv (PEP 668 compliant). diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..d68252a --- /dev/null +++ b/debian/copyright @@ -0,0 +1,24 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: nox-cli +Upstream-Contact: nox-project +Source: https://github.com/nox-project/nox-framework + +Files: * +Copyright: 2024-2026 nox-project +License: Apache-2.0 + +License: Apache-2.0 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + https://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian systems, the full text of the Apache License, Version 2.0 + can be found in the file /usr/share/common-licenses/Apache-2.0. diff --git a/debian/install b/debian/install new file mode 100644 index 0000000..63a1e2c --- /dev/null +++ b/debian/install @@ -0,0 +1 @@ +sources/*.json usr/share/nox-cli/sources diff --git a/debian/postrm b/debian/postrm new file mode 100755 index 0000000..be05932 --- /dev/null +++ b/debian/postrm @@ -0,0 +1,11 @@ +#!/bin/sh +set -e +if [ "$1" = "purge" ]; then + rm -rf /opt/nox-cli + rm -rf /root/.nox /root/.config/nox-cli + # Also clean for the invoking user if SUDO_USER is set + if [ -n "$SUDO_USER" ]; then + UHOME=$(getent passwd "$SUDO_USER" | cut -d: -f6) + rm -rf "${UHOME}/.nox" "${UHOME}/.config/nox-cli" + fi +fi diff --git a/debian/prerm b/debian/prerm new file mode 100755 index 0000000..4ff2076 --- /dev/null +++ b/debian/prerm @@ -0,0 +1,3 @@ +#!/bin/sh +set -e +rm -f /usr/bin/nox-cli diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..5aa9c88 --- /dev/null +++ b/debian/rules @@ -0,0 +1,3 @@ +#!/usr/bin/make -f +%: + dh $@ --with python3 --buildsystem=pybuild diff --git a/debian/tests/control b/debian/tests/control new file mode 100644 index 0000000..353dac6 --- /dev/null +++ b/debian/tests/control @@ -0,0 +1,3 @@ +Tests: smoke +Depends: @ +Restrictions: allow-stderr diff --git a/debian/tests/smoke b/debian/tests/smoke new file mode 100755 index 0000000..3a914ff --- /dev/null +++ b/debian/tests/smoke @@ -0,0 +1,5 @@ +#!/bin/sh +set -e +nox-cli --help > /dev/null +nox-cli --version > /dev/null +echo "smoke: OK" diff --git a/docs/nox-cli.1 b/docs/nox-cli.1 new file mode 100644 index 0000000..b28f3ae --- /dev/null +++ b/docs/nox-cli.1 @@ -0,0 +1,109 @@ +.TH NOX\-CLI 1 "2026-03-30" "1.0.0" "NOX Framework" +.SH NAME +nox-cli \- Advanced Asynchronous Cyber Threat Intelligence Framework +.SH SYNOPSIS +.B nox-cli +[\fIOPTIONS\fR] +.SH DESCRIPTION +.B nox-cli +is an open-source OSINT and breach intelligence framework supporting 120+ +JSON-plugin data sources. It performs asynchronous multi-source lookups +against email addresses, domains, IP addresses, usernames, phone numbers, +and hashes. Results can be exported in JSON, CSV, HTML, Markdown, or PDF. +.PP +On first run, the application creates \fI~/.nox/\fR with a default +\fIconfig.ini\fR and seeds the sources directory from the package data. +.SH OPTIONS +.TP +.BR \-t ", " \-\-target " " \fITARGET\fR +Target to scan (email, domain, IP, username, phone, or hash). +.TP +.BR \-i ", " \-\-interactive +Launch the interactive REPL shell. +.TP +.BR \-\-version +Print version number and exit. +.TP +.BR \-\-autoscan +Full pipeline: breach scan + recursive identity pivot + dorking + paste/Telegram scraping. +Equivalent to running all phases in sequence on the target and every discovered asset. +.TP +.BR \-\-fullscan +Full scan including pivot enrichment, dorking, and scraping (alias for \-\-autoscan). +.TP +.BR \-\-no\-pivot +Disable recursive pivot enrichment during a full scan. +.TP +.BR \-\-dork " " \fITARGET\fR +Run Google dorking against the specified target. +.TP +.BR \-\-scrape " " \fITARGET\fR +Run web scraping and Telegram indexing against the specified target. +.TP +.BR \-\-crack " " \fIHASH\fR +Attempt to crack the given hash using online rainbow-table APIs and local wordlists. +.B WARNING: +submitting hashes to online APIs leaks them to third-party services. +Use \fB\-\-no\-online\-crack\fR to restrict cracking to local wordlists only. +.TP +.BR \-\-no\-online\-crack +Disable all online rainbow-table API queries during hash cracking. +Only local wordlist-based cracking is performed. No hash data is sent to +external services. Recommended for sensitive engagements. +.TP +.BR \-\-analyze " " \fIPASSWORD\fR +Analyze a password for strength and breach exposure. +.TP +.BR \-\-apikeys +Show the API key configuration dashboard. Displays configured and unconfigured +keys for all supported services. +.TP +.BR \-\-allow\-leak +Bypass the fail-safe OPSEC kill-switch and allow direct connections even when +a proxy or Tor circuit is unavailable. Use only in controlled environments. +.TP +.BR \-\-tor +Route all requests through the local Tor SOCKS proxy (port 9050). +.TP +.BR \-\-proxy " " \fIURL\fR +Use the specified proxy URL for all requests. +.TP +.BR \-\-threads " " \fIN\fR +Maximum concurrency level (default: 20). +.TP +.BR \-\-timeout " " \fISECONDS\fR +Per-request timeout in seconds (default: 15). +.TP +.BR \-o ", " \-\-output " " \fIFILE\fR +Write results to the specified output file. +.TP +.BR \-\-format " " \fI{json,csv,html,md,pdf}\fR +Output format (default: json). +.TP +.BR \-\-diff +Compare the current scan results against the last cached scan for the same +target and display only new findings. Records already present in the local +SQLite cache are suppressed. Useful for recurring exposure monitoring. +.SH FILES +.TP +.I ~/.nox/config.ini +Per-user configuration file. Created automatically on first run. +Contains \fB[settings]\fR (concurrency, timeout, stealth, rate limits) +and \fB[api_keys]\fR sections. +.TP +.I /etc/nox/config.ini +System-wide configuration file. Used as fallback when the per-user file +does not exist. +.TP +.I ~/.nox/sources/ +Directory containing JSON source definition files. +.TP +.I ~/.nox/reports/ +Default output directory for generated reports. +.TP +.I ~/.nox/logs/nox.log +Application log file. +.SH BUGS +Report bugs at https://github.com/nox-project/nox-framework/issues +.SH AUTHOR +nox-project diff --git a/nox-wrapper.sh b/nox-wrapper.sh new file mode 100755 index 0000000..cbb4b30 --- /dev/null +++ b/nox-wrapper.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -e + +VENV="/opt/nox-cli/.venv" +NOX="/opt/nox-cli/nox.py" + +if [[ ! -f "$VENV/bin/python" ]]; then + echo "[!] NOX Framework venv missing at $VENV — reinstall: sudo dpkg -i nox-cli_*.deb" >&2 + exit 1 +fi + +export PYTHONPATH="/opt/nox-cli:${PYTHONPATH:-}" +export NOX_PROG_NAME="nox-cli" +exec "$VENV/bin/python" "$NOX" "$@" diff --git a/nox.py b/nox.py new file mode 100644 index 0000000..8a91fdf --- /dev/null +++ b/nox.py @@ -0,0 +1,7311 @@ +#!/usr/bin/env python3 +""" +NOX — Cyber Threat Intelligence Framework +Async core | 120+ breach sources | Risk scoring | Identity graphing | HVT detection +""" + +import asyncio +import hashlib +import html as html_module +import json +import sys as _sys + +# ── Global namespace injection — location-agnostic path anchor ───────── +# Resolves the package root whether NOX is run from /usr/bin, /home, or /tmp. +# Canonical install: /usr/lib/python3/dist-packages/nox/nox.py +# Dev/source run: /nox.py +import pathlib as _pl +_SCRIPT_DIR = _pl.Path(__file__).resolve().parent +_INSTALL_PKG = _pl.Path("/usr/lib/python3/dist-packages/nox") +_PKG_ROOT = _INSTALL_PKG if _SCRIPT_DIR == _INSTALL_PKG else _SCRIPT_DIR +if str(_PKG_ROOT) not in _sys.path: + _sys.path.insert(0, str(_PKG_ROOT)) + +# ── Credential helper (XDG JSON store) ──────────────────────────────── +try: + from sources.helpers.config_handler import ( # type: ignore + ConfigManager as _ExtConfigManager, + UNIVERSAL_PLACEHOLDER, + SERVICE_REGISTRY, + ) + _HAS_CONFIG_HANDLER = True +except ImportError: + _HAS_CONFIG_HANDLER = False + UNIVERSAL_PLACEHOLDER = "INSERT_API_KEY_HERE" + SERVICE_REGISTRY = {} + _ExtConfigManager = None + +try: + from sources.helpers.cracker import detect_hash # type: ignore + _HAS_CRACKER = True +except ImportError: + _HAS_CRACKER = False + def detect_hash(v): # type: ignore + return None + +try: + from sources.helpers.scanner import AvalancheScanner # type: ignore + _HAS_AVALANCHE = True +except ImportError: + _HAS_AVALANCHE = False + AvalancheScanner = None # type: ignore + +try: + from sources.helpers.reporting import ( # type: ignore + to_json as _rep_json, + to_html as _rep_html, + to_pdf as _rep_pdf, + ) + _HAS_REPORTING = True +except ImportError: + _HAS_REPORTING = False +import os +import random +import re +import sys +import time +import threading +# Module-level lock for thread-safe proxy env var assignment (Bug 9 fix) +_PROXY_ENV_LOCK = threading.Lock() +import argparse +import csv +import logging +import math +import tempfile +import urllib.parse +import urllib.request +import urllib.error +import http.cookiejar +import gzip +import ssl +import base64 +from abc import ABC, abstractmethod +from contextlib import contextmanager + +aiosqlite = None +try: + import aiosqlite as _aiosqlite + aiosqlite = _aiosqlite +except ImportError: + pass +import sqlite3 as _sqlite3_fallback +from dataclasses import dataclass, field, asdict +from datetime import datetime, timezone +from enum import Enum, auto +from pathlib import Path +from typing import Dict, List, Optional, Set, Any, Tuple + +OPTIONAL: Dict[str, Any] = {} + + +def _try_import(name: str, pkg: str = None): + try: + m = __import__(pkg or name) + OPTIONAL[name] = m + return m + except ImportError: + return None + + +aiohttp_mod = _try_import("aiohttp") +bs4 = _try_import("bs4", "bs4") +BeautifulSoup = getattr(bs4, "BeautifulSoup", None) if bs4 else None +cloudscraper = _try_import("cloudscraper") +stem = _try_import("stem") +colorama = _try_import("colorama") +rich_mod = _try_import("rich") +phonenumbers = _try_import("phonenumbers") +requests = _try_import("requests") +try: + from weasyprint import HTML as _WP_HTML + weasyprint = _WP_HTML +except ImportError: + weasyprint = None + +if colorama: + colorama.init(autoreset=True) + +try: + from importlib.metadata import version as _pkg_version + VERSION = _pkg_version("nox-cli") +except Exception: + # Fallback: read directly from pyproject.toml (dev/source run) + try: + import tomllib as _toml # Python 3.11+ + except ImportError: + try: + import tomli as _toml # type: ignore + except ImportError: + _toml = None # type: ignore + if _toml: + try: + with open(_pl.Path(__file__).resolve().parent / "pyproject.toml", "rb") as _f: + VERSION = _toml.load(_f)["project"]["version"] + except Exception: + VERSION = "1.0.0" + else: + VERSION = "1.0.0" +BUILD_DATE = "2026-04-02" + +# ── Smart Path Layout ────────────────────────────────────────────────── +HOME_NOX = Path.home() / ".nox" +LOG_DIR = HOME_NOX / "logs" +REPORT_DIR = HOME_NOX / "reports" +SOURCE_DIR = HOME_NOX / "sources" +VAULT_DIR = HOME_NOX / "vault" +# XDG config dir — canonical location for apikeys, system log +_XDG_CFG = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) / "nox-cli" +SYSLOG_DIR = _XDG_CFG / "logs" + + +def initialize_environment() -> None: + """ + Create ~/.nox directory tree, seed sources from the script location or + /usr/share/nox-cli/sources/ if the user sources dir is empty, and fix + ownership when the directory was previously created by root (sudo). + Creates a default config.ini on first run if not present. + """ + import shutil + + # Create all required directories + PROVIDER_DIR = HOME_NOX / "providers" + for d in (HOME_NOX, LOG_DIR, REPORT_DIR, SOURCE_DIR, VAULT_DIR, PROVIDER_DIR): + d.mkdir(mode=0o755, parents=True, exist_ok=True) + + # Ownership fix: if run as root previously, re-own to the real user + real_uid = int(os.environ.get("SUDO_UID", os.getuid())) + real_gid = int(os.environ.get("SUDO_GID", os.getgid())) + if os.getuid() == 0 and real_uid != 0: + for d in (HOME_NOX, LOG_DIR, REPORT_DIR, SOURCE_DIR, VAULT_DIR): + try: + os.chown(d, real_uid, real_gid) + except OSError: + pass + + # Create default config.ini on first run + _default_cfg = HOME_NOX / "config.ini" + if not _default_cfg.exists(): + import configparser as _cp + cfg = _cp.ConfigParser() + cfg["settings"] = { + "concurrency": "20", + "timeout": "30", + "stealth": "true", + "rate_limit_lo": "0.5", + "rate_limit_hi": "2.0", + } + cfg["api_keys"] = {} + with open(_default_cfg, "w") as fh: + cfg.write(fh) + + # Smart source discovery: seed ~/.nox/sources/ from package sources/ + # B6: only copy if destination is absent — never silently overwrite + # user-customised sources. Use --reset-sources to force a full resync. + candidate = _PKG_ROOT / "sources" + if not candidate.is_dir(): + candidate = Path("/usr/share/nox-cli/sources") + if candidate.is_dir(): + for jf in candidate.glob("*.json"): + dst = SOURCE_DIR / jf.name + try: + if not dst.exists(): + shutil.copy2(jf, dst) + except OSError: + pass + + +# ── Static Configuration ─────────────────────────────────────────────── +class Cfg: + TIMEOUT = 30 + RETRIES = 3 + RETRY_DELAY = 2 + CONCURRENCY = 20 + RATE_LIMIT = (0.5, 2.0) + TOR_SOCKS = 9050 + TOR_CTRL = 9051 + TOR_PASS = "" + STEALTH = True + BASE = HOME_NOX + DB = HOME_NOX / "nox_cache.db" + REPORTS = REPORT_DIR + LOGS = LOG_DIR + WORDLISTS = HOME_NOX / "wordlists" + CACHE_TTL = 86400 + DORK_MAX = 50 + DORK_DELAY = (0.5, 2.0) + PASTE_MAX = 100 + PASTE_DELAY = (1.0, 3.0) + PIVOT_DEPTH = 2 + PIVOT_CONFIDENCE = 0.70 + + # Browser-grade TLS cipher suite for JA3 fingerprint matching + TLS_CIPHERS = ( + "TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:" + "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:" + "ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:" + "ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:" + "ECDHE-RSA-AES128-SHA:ECDHE-RSA-AES256-SHA:" + "AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA:AES256-SHA" + ) + + @classmethod + def init(cls) -> None: + for d in [cls.REPORTS, cls.LOGS, cls.WORDLISTS]: + d.mkdir(parents=True, exist_ok=True) + cls.BASE.mkdir(parents=True, exist_ok=True) + + +Cfg.init() + + +# ── Runtime Configuration ────────────────────────────────────────────── +class NoxConfig: + def __init__(self) -> None: + self.use_tor = False + self.proxy = None + self.concurrency = Cfg.CONCURRENCY + self.timeout = Cfg.TIMEOUT + self.stealth = Cfg.STEALTH + self.rate_limit = Cfg.RATE_LIMIT + self.tor_socks = Cfg.TOR_SOCKS + self.tor_ctrl = Cfg.TOR_CTRL + self.tor_pass = Cfg.TOR_PASS + self.allow_leak = False + self.no_online_crack = False + self.max_threads = Cfg.CONCURRENCY + # A9/I3: pivot control — readable by AvalancheScanner + self.no_pivot = False + self.pivot_depth = Cfg.PIVOT_DEPTH + + +# ── Logging ──────────────────────────────────────────────────────────── +LOG_DIR.mkdir(parents=True, exist_ok=True) +SYSLOG_DIR.mkdir(parents=True, exist_ok=True) + +logger = logging.getLogger("nox") +if not logger.handlers: + logger.setLevel(logging.DEBUG) + logger.propagate = False + _fh = logging.FileHandler(str(LOG_DIR / "nox.log")) + _fh.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")) + _fh.setLevel(logging.DEBUG) + logger.addHandler(_fh) + # Terminal: WARNING and above only — no debug/info noise + _sh = logging.StreamHandler() + _sh.setLevel(logging.WARNING) + _sh.setFormatter(logging.Formatter("[%(levelname)s] %(message)s")) + logger.addHandler(_sh) + +# ── System event log: API status, rate-limits, crack attempts ───────── +# Writes to ~/.config/nox-cli/logs/nox_system.log — never to terminal +_syslog = logging.getLogger("nox.system") +if not _syslog.handlers: + _syslog.setLevel(logging.INFO) + _sfh = logging.FileHandler(str(SYSLOG_DIR / "nox_system.log")) + _sfh.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")) + _syslog.addHandler(_sfh) + _syslog.propagate = False + + +# ── Colors / Console ─────────────────────────────────────────────────── +class C: + R = "\033[91m"; G = "\033[92m"; Y = "\033[93m"; B = "\033[94m" + P = "\033[95m"; CY = "\033[96m"; W = "\033[97m"; GR = "\033[90m" + O = "\033[38;5;208m"; BD = "\033[1m"; DM = "\033[2m"; X = "\033[0m" + + @staticmethod + def c(t: str, color: str = "W") -> str: + m = { + "red": C.R, "green": C.G, "yellow": C.Y, "blue": C.B, + "purple": C.P, "cyan": C.CY, "white": C.W, "gray": C.GR, + "orange": C.O, "bold": C.BD, "dim": C.DM, + } + return f"{m.get(color, C.W)}{t}{C.X}" + + +class Console: + ICONS = { + "breach": f"{C.R}[!]{C.X}", "pass": f"{C.Y}[*]{C.X}", "hash": f"{C.P}[#]{C.X}", + "net": f"{C.B}[~]{C.X}", "stealth": f"{C.GR}[^]{C.X}", "ok": f"{C.G}[+]{C.X}", + "err": f"{C.R}[-]{C.X}", "warn": f"{C.Y}[!]{C.X}", "info": f"{C.CY}[i]{C.X}", + "db": f"{C.B}[D]{C.X}", "report": f"{C.G}[R]{C.X}", "dork": f"{C.O}[G]{C.X}", + "paste": f"{C.P}[P]{C.X}", "scrape": f"{C.B}[S]{C.X}", "combo": f"{C.R}[C]{C.X}", + "pivot": f"{C.CY}[↻]{C.X}", + } + + @staticmethod + def s(msg: str, icon: str = "info") -> None: + print(f" {Console.ICONS.get(icon, Console.ICONS['info'])} {msg}") + + @staticmethod + def ok(msg: str) -> None: + Console.s(msg, "ok") + + @staticmethod + def err(msg: str) -> None: + Console.s(msg, "err") + + @staticmethod + def warn(msg: str) -> None: + Console.s(msg, "warn") + + @staticmethod + def dim(msg: str) -> None: + pass # file logging handled by out() + + @staticmethod + def section(title: str) -> None: + print(f"\n {C.c('='*58,'purple')}\n {C.c(f' {title}','bold')}\n {C.c('='*58,'purple')}") + + @staticmethod + def table(headers: List[str], rows: List[List], title: str = None) -> None: + if title: + print(f"\n {C.c(title,'bold')}") + if not rows: + print(f" {C.c('(empty)','gray')}") + return + widths = [ + max(len(str(h)), max((len(str(r[i])) for r in rows), default=0)) + for i, h in enumerate(headers) + ] + hdr = " | ".join(C.c(str(h).ljust(widths[i]), "cyan") for i, h in enumerate(headers)) + print(f" {hdr}\n {'-+-'.join('-'*w for w in widths)}") + for row in rows: + print(f" {' | '.join(str(row[i]).ljust(widths[i]) for i in range(len(headers)))}") + + @staticmethod + def progress(cur: int, tot: int, prefix: str = "Progress", w: int = 30) -> None: + if tot == 0: + return + p = cur / tot + f = int(w * p) + bar = C.c("█" * f, "green") + C.c("░" * (w - f), "gray") + print(f"\r {prefix} [{bar}] {C.c(f'{p:.0%}','cyan')} ({cur}/{tot})", end="", flush=True) + if cur >= tot: + print() + + +_ANSI_RE = re.compile(r"\x1b\[[0-9;]*m") + + +def out(level: str, msg: str) -> None: + fn = getattr(Console, level, None) + if fn: + fn(msg) + else: + Console.s(msg) + # Mirror every terminal message to the log file so users can audit the full run. + clean = _ANSI_RE.sub("", msg) + if level in ("err",): + logger.error("[%s] %s", level, clean) + elif level in ("warn",): + logger.warning("[%s] %s", level, clean) + elif level in ("ok", "info", "pivot", "breach", "scrape", "dork", "paste"): + logger.info("[%s] %s", level, clean) + else: + logger.debug("[%s] %s", level, clean) + + +# ── Data Models ──────────────────────────────────────────────────────── +class Severity(Enum): + CRITICAL = auto() + HIGH = auto() + MEDIUM = auto() + LOW = auto() + INFO = auto() + + +# ── Intelligence constants ───────────────────────────────────────────── +_SRC_CONFIDENCE: Dict[str, float] = { + "HIBP": 1.0, "HudsonRock": 0.95, "SpyCloud": 0.92, "RecordedFuture": 0.90, + "Dehashed": 0.88, "WhiteIntel": 0.88, "CyberSixGill": 0.87, "FlareIO": 0.85, + "DarkTracer": 0.85, "IntelX": 0.83, "SOCRadar": 0.82, "LeakCheck": 0.80, + "BreachSense": 0.80, "DataViper": 0.78, "Snusbase": 0.75, "WeLeakInfo": 0.75, + "LeakLookup": 0.72, "LeakLookupV2": 0.72, "BulkLeakLookup": 0.70, + "Scylla": 0.68, "DeepSearch": 0.65, "BreachDirectory": 0.65, "LeakPeek": 0.65, + "LeakSearch": 0.63, "CheckLeaked": 0.62, "Antipublic": 0.60, "GhostProject": 0.60, + "LeakedSite": 0.58, "LeakedPassword": 0.58, "NuclearLeaks": 0.55, + "ProxyNovaCOMB": 0.55, "CredStuffDB": 0.55, "ComboList": 0.55, + "PwnDB": 0.52, "LeakOSINT": 0.52, "Pentester": 0.50, + "HunterIO": 0.70, "FullContact": 0.68, "PeopleDataLabs": 0.68, + "ZeroBounce": 0.65, "RocketReach": 0.62, "Gravatar": 0.45, + "EmailRep": 0.55, "Holehe": 0.50, "NameCheck": 0.45, + "FirefoxMonitor": 0.60, "AvastHackCheck": 0.55, "Inoitsu": 0.50, + "BreachAlarm": 0.50, "HaveIBeenSold": 0.55, "CyberNews": 0.55, + "XposedOrNot": 0.60, "AshleyMadison": 0.70, + "Shodan": 0.80, "Censys": 0.78, "BinaryEdge": 0.75, "SecurityTrails": 0.75, + "FullHunt": 0.72, "Netlas": 0.70, "ZoomEye": 0.70, "Onyphe": 0.68, + "VirusTotal": 0.85, "AlienVaultOTX": 0.80, "Pulsedive": 0.72, + "ThreatCrowd": 0.65, "Maltiverse": 0.65, "PassiveTotal": 0.75, + "AbuseIPDB": 0.78, "GreyNoise": 0.75, "MXToolbox": 0.65, + "WhoisXML": 0.60, "URLScan": 0.65, "ExploitDB": 0.70, + "ThreatBook": 0.68, "Huntress": 0.72, + "StealerLogSearch": 0.90, "IntelXPhone": 0.80, "IntelFinder": 0.75, + "BreachForumsIntel": 0.60, "RaidForumsArchive": 0.55, "OGUsers": 0.50, + "Cracked.to": 0.55, "Nulled.to": 0.55, "DarkWebTor": 0.50, + "WikiLeaks": 0.75, "RansomWatch": 0.85, "DataBreaches.net": 0.55, + "PastebinIntel": 0.35, "PasteHunter": 0.35, "ScrapeEngine": 0.30, + "TelegramOSINT": 0.30, "GoogleDork": 0.30, "SynapsInt": 0.40, + "WaybackMachine": 0.40, "BuiltWith": 0.40, "CertStream": 0.45, + "GitLeaks": 0.65, "SPF/DMARC": 0.40, "Picostatus": 0.30, + "LeakedDomains": 0.60, "Leakix": 0.72, + "PhoneInfo": 0.55, "Numverify": 0.60, "TrueCaller": 0.65, + "Hashmob": 0.95, "HashKiller": 0.90, "HashesOrg": 0.90, + "LeakLookupHash": 0.80, +} + +_STEALER_TAGS = {"stealer", "redline", "raccoon", "vidar", "infostealer", "lumma", "azorult", "stealc"} +_FAST_HASHES = {"md5", "sha1", "sha256", "ntlm", "lm"} +_CORP_PW_RE = re.compile(r"(?i)([A-Z][a-z]{2,})(20\d{2}|19\d{2})[!@#$%^&*]?$") +_VIP_EMAIL_RE = re.compile(r"(?i)(admin|administrator|root|ceo|cto|ciso|cfo|vp|director|manager|sysadmin|devops|security|infosec|noc|soc)") +_VIP_DOM_RE = re.compile(r"\.(gov|mil|edu|police|gouv|gob)(\.[a-z]{2})?$", re.I) +_HVT_KEYWORDS = frozenset({ + "admin", "administrator", "root", "ceo", "cto", "ciso", "cfo", + "vp", "director", "manager", "sysadmin", "devops", "security", + "infosec", "noc", "soc", "superuser", "sa", "dba", "ops", +}) +_HVT_DOMAINS = re.compile( + r"\.(gov|mil|int|police|gouv|gob|gc\.ca|gov\.uk|mod\.uk)(\.[a-z]{2})?$", + re.IGNORECASE, +) + +_INTEL_SCHEMA = """ +CREATE TABLE IF NOT EXISTS identities ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + primary_id TEXT NOT NULL UNIQUE, + emails TEXT DEFAULT '[]', + usernames TEXT DEFAULT '[]', + phones TEXT DEFAULT '[]', + max_risk REAL DEFAULT 0.0, + is_hvt INTEGER DEFAULT 0, + pivot_count TEXT DEFAULT '{}', + ts REAL DEFAULT (strftime('%s','now')) +); +CREATE TABLE IF NOT EXISTS leaks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + identity_id INTEGER REFERENCES identities(id) ON DELETE CASCADE, + source TEXT, + email TEXT, + username TEXT, + password TEXT, + password_hash TEXT, + hash_type TEXT, + phone TEXT, + breach_name TEXT, + breach_date TEXT, + risk_score REAL DEFAULT 0, + source_conf REAL DEFAULT 0.5, + data_types TEXT DEFAULT '[]', + is_hvt INTEGER DEFAULT 0, + dedup_hash TEXT UNIQUE, + ts REAL DEFAULT (strftime('%s','now')) +); +CREATE TABLE IF NOT EXISTS correlation_links ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + identity_id INTEGER REFERENCES identities(id) ON DELETE CASCADE, + pivot_type TEXT, + pivot_value TEXT, + linked_ids TEXT DEFAULT '[]', + ts REAL DEFAULT (strftime('%s','now')) +); +CREATE TABLE IF NOT EXISTS query_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + query TEXT NOT NULL UNIQUE, + qtype TEXT, + scanned REAL DEFAULT (strftime('%s','now')) +); +CREATE INDEX IF NOT EXISTS idx_leaks_email ON leaks(email); +CREATE INDEX IF NOT EXISTS idx_leaks_identity ON leaks(identity_id); +CREATE INDEX IF NOT EXISTS idx_leaks_risk ON leaks(risk_score DESC); +CREATE INDEX IF NOT EXISTS idx_leaks_dedup ON leaks(dedup_hash); +CREATE INDEX IF NOT EXISTS idx_ident_hvt ON identities(is_hvt); +CREATE INDEX IF NOT EXISTS idx_cache_query ON query_cache(query); +CREATE TABLE IF NOT EXISTS intel_records ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source TEXT, target TEXT, email TEXT, + password TEXT, phone TEXT, address TEXT, + full_name TEXT, fingerprint TEXT UNIQUE +); +CREATE TABLE IF NOT EXISTS dork_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_url TEXT UNIQUE, + file_type TEXT, + metadata_json TEXT, + parent_target TEXT, + ts REAL DEFAULT (strftime('%s','now')) +); +""" + + +def _parse_breach_date(raw: str) -> Optional[datetime]: + if not raw: + return None + raw = raw.strip() + for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d"): + try: + return datetime.strptime(raw[:19], fmt).replace(tzinfo=timezone.utc) + except ValueError: + pass + m = re.search(r"(\d{2})/(\d{2})/(\d{4})", raw) + if m: + # Try MM/DD/YYYY first, then DD/MM/YYYY (European format) + for month, day in [(int(m.group(1)), int(m.group(2))), (int(m.group(2)), int(m.group(1)))]: + try: + return datetime(int(m.group(3)), month, day, tzinfo=timezone.utc) + except ValueError: + pass + m = re.fullmatch(r"(\d{4})", raw) + if m: + return datetime(int(m.group(1)), 1, 1, tzinfo=timezone.utc) + return None + + +# ── Shared helpers ───────────────────────────────────────────────────── +def _rec_get(r: Any, k: str) -> Any: + return r.get(k, "") if isinstance(r, dict) else getattr(r, k, "") + + +def _is_vip(r: Any) -> bool: + ident = _rec_get(r, "email") or _rec_get(r, "username") + return bool(_VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident)) + + +def _is_stealer(r: Any) -> bool: + dt = _rec_get(r, "data_types") or [] + combined = ( + (" ".join(dt) if isinstance(dt, list) else str(dt)).lower() + + _rec_get(r, "source").lower() + ) + return any(t in combined for t in _STEALER_TAGS) + + +# ── Record dataclass ─────────────────────────────────────────────────── +@dataclass +class Record: + source: str + email: str = "" + username: str = "" + password: str = "" + password_hash: str = "" + hash_type: str = "" + ip_address: str = "" + phone: str = "" + name: str = "" + domain: str = "" + breach_date: str = "" + breach_name: str = "" + data_types: List[str] = field(default_factory=list) + severity: Severity = Severity.MEDIUM + raw_data: Dict = field(default_factory=dict) + verified: bool = False + timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) + risk_score: float = 0.0 + source_confidence: float = 0.5 + is_hvt: bool = False + persistence_score: float = 0.0 + + address: str = "" + full_name: str = "" + metadata: Dict = field(default_factory=dict) + + def to_dict(self) -> Dict: + d = asdict(self) + d["severity"] = self.severity.name + return d + + def dedup_key(self) -> str: + """SHA-256 of normalised email:password for cross-source deduplication.""" + em = (self.email or self.username or "").lower().strip() + pw = (self.password or self.password_hash or "").strip() + return hashlib.sha256(f"{em}:{pw}".encode()).hexdigest() + + def get_fingerprint(self) -> str: + """Genera un hash univoco per evitare duplicati nel database.""" + data_str = f"{self.source}|{self.email}|{self.password}|{self.phone}|{self.address}" + return hashlib.sha256(data_str.encode()).hexdigest() + + +# ── Risk Engine ──────────────────────────────────────────────────────── +class RiskEngine: + """ + Predictive risk scoring engine (0–100). + + Temporal Correlation & Exposure Scoring: + - Persistence Score: multiplier when data appears across multiple distinct + datasets in different years. + - Exposure Recency: exponential multiplier for recent breaches. + """ + + _DECAY_BOOST_DAYS = 365 + _DECAY_MID_DAYS = 730 + _DECAY_PENALTY_DAYS = 1825 + + @staticmethod + def score(record: "Record") -> "Record": + conf = _SRC_CONFIDENCE.get(record.source, 0.5) + record.source_confidence = conf + + dtypes_str = " ".join(record.data_types).lower() if record.data_types else "" + src_lower = record.source.lower() + + is_stealer = any(t in dtypes_str or t in src_lower for t in _STEALER_TAGS) + if is_stealer and record.password: + record.risk_score = 100.0 + record.severity = Severity.CRITICAL + return record + + pts = 0.0 + if record.password: + pts += 60 + # I5: adjust base points by password complexity + # Weak passwords (trivially guessable) score lower; strong ones score higher. + try: + _pa_score = PassAnalyzer().analyze(record.password).get("score", 50) + if _pa_score < 30: + pts = max(0.0, pts - 15) + elif _pa_score > 80: + pts = min(100.0, pts + 10) + except Exception: + pass + elif record.password_hash: + ht = (record.hash_type or "").lower() + pts += 30 if ht in _FAST_HASHES else 15 + else: + pts += 5 + + dt = _parse_breach_date(record.breach_date) + if dt: + age_days = (datetime.now(timezone.utc) - dt).days + if age_days < RiskEngine._DECAY_BOOST_DAYS: + # Exponential recency multiplier + recency_factor = 1.0 + 0.5 * math.exp(-age_days / 180) + pts = pts * recency_factor + 30 + elif age_days < RiskEngine._DECAY_MID_DAYS: + pts += 15 + elif age_days > RiskEngine._DECAY_PENALTY_DAYS: + pts = max(0.0, pts - 20) + + pts *= 0.5 + conf * 0.5 + + ident = record.email or record.username or "" + local = ident.split("@")[0].lower() if "@" in ident else ident.lower() + domain_part = ident.split("@")[1].lower() if "@" in ident else "" + if ( + any(kw in local for kw in _HVT_KEYWORDS) + or (_HVT_DOMAINS.search(domain_part) if domain_part else False) + or _VIP_EMAIL_RE.search(ident) + or _VIP_DOM_RE.search(ident) + ): + pts = min(100.0, pts + 15) + + record.risk_score = round(min(pts, 100.0), 1) + rs = record.risk_score + if rs >= 90: record.severity = Severity.CRITICAL + elif rs >= 70: record.severity = Severity.HIGH + elif rs >= 40: record.severity = Severity.MEDIUM + elif rs >= 10: record.severity = Severity.LOW + else: record.severity = Severity.INFO + return record + + @staticmethod + def apply_persistence(records: List["Record"]) -> List["Record"]: + """ + Assign a Persistence Score when the same identity appears across + multiple distinct breach datasets in different calendar years. + """ + identity_years: Dict[str, Set[int]] = {} + identity_sources: Dict[str, Set[str]] = {} + + for r in records: + ident = (r.email or r.username or "").lower() + if not ident: + continue + identity_sources.setdefault(ident, set()).add(r.source) + dt = _parse_breach_date(r.breach_date) + if dt: + identity_years.setdefault(ident, set()).add(dt.year) + + for r in records: + ident = (r.email or r.username or "").lower() + if not ident: + continue + years = identity_years.get(ident, set()) + sources = identity_sources.get(ident, set()) + if len(years) >= 2 and len(sources) >= 2: + span = max(years) - min(years) if years else 0 + r.persistence_score = round(min(100.0, len(sources) * 10 + span * 5), 1) + r.risk_score = round(min(100.0, r.risk_score + r.persistence_score * 0.3), 1) + return records + + +# ── Identity Graphing & Correlation ─────────────────────────────────── +@dataclass +class TargetProfile: + """Unified identity profile built by IdentityResolver.""" + + primary_id: str + emails: List[str] = field(default_factory=list) + usernames: List[str] = field(default_factory=list) + phones: List[str] = field(default_factory=list) + records: list = field(default_factory=list) + pivot_count: Dict[str, int] = field(default_factory=dict) + max_risk: float = 0.0 + is_hvt: bool = False + stuffing_risk: str = "LOW" + + def _add(self, rec: Any) -> None: + self.records.append(rec) + self.max_risk = max(self.max_risk, float(_rec_get(rec, "risk_score") or 0.0)) + + pw = _rec_get(rec, "password") + usr = _rec_get(rec, "username") + ph = _rec_get(rec, "phone") + for val in filter(None, [ + pw if pw and len(pw) > 6 else None, + usr or None, + ph or None, + ]): + self.pivot_count[val] = self.pivot_count.get(val, 0) + 1 + + em = _rec_get(rec, "email") + if em and em not in self.emails: self.emails.append(em) + if usr and usr not in self.usernames: self.usernames.append(usr) + if ph and ph not in self.phones: self.phones.append(ph) + + ident = em or usr or "" + if _VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident): + self.is_hvt = True + + def _compute_stuffing_risk(self) -> None: + max_reuse = max(self.pivot_count.values(), default=0) + if max_reuse >= 5: self.stuffing_risk = "CRITICAL" + elif max_reuse >= 3: self.stuffing_risk = "HIGH" + elif max_reuse >= 2: self.stuffing_risk = "MEDIUM" + else: self.stuffing_risk = "LOW" + + +class IdentityResolver: + """Links breach records into unified TargetProfile clusters via Union-Find.""" + + def __init__(self, records: list) -> None: + self._records = records + + def resolve(self) -> List[TargetProfile]: + parent: Dict[str, str] = {} + pivot_map: Dict[str, str] = {} + + def _root(x: str) -> str: + while parent.get(x, x) != x: + parent[x] = parent.get(parent.get(x, x), x) + x = parent.get(x, x) + return x + + def _union(a: str, b: str) -> None: + ra, rb = _root(a), _root(b) + if ra != rb: + parent[rb] = ra + + for rec in self._records: + node = ( + _rec_get(rec, "email") or _rec_get(rec, "username") + or _rec_get(rec, "phone") or _rec_get(rec, "source") + ) + if not node: + continue + parent.setdefault(node, node) + pw = _rec_get(rec, "password") + for pv in filter(None, [ + _rec_get(rec, "email") or None, + _rec_get(rec, "username") or None, + _rec_get(rec, "phone") or None, + pw if pw and len(pw) > 6 else None, + ]): + if pv in pivot_map: + _union(node, pivot_map[pv]) + else: + pivot_map[pv] = node + + clusters: Dict[str, TargetProfile] = {} + for rec in self._records: + node = ( + _rec_get(rec, "email") or _rec_get(rec, "username") + or _rec_get(rec, "phone") or _rec_get(rec, "source") + ) + if not node: + continue + root = _root(node) + if root not in clusters: + clusters[root] = TargetProfile(primary_id=root) + clusters[root]._add(rec) + + for profile in clusters.values(): + profile._compute_stuffing_risk() + + return sorted(clusters.values(), key=lambda p: -p.max_risk) + + +# ── HVT Analyzer ────────────────────────────────────────────────────── +class HVTAnalyzer: + """High-Value Target & VIP detection module.""" + + @staticmethod + def is_hvt(record: Any) -> bool: + ident = _rec_get(record, "email") or _rec_get(record, "username") or "" + local = ident.split("@")[0].lower() if "@" in ident else ident.lower() + domain_part = ident.split("@")[1].lower() if "@" in ident else "" + if any(kw in local for kw in _HVT_KEYWORDS): + return True + if domain_part and _HVT_DOMAINS.search(domain_part): + return True + if _VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident): + return True + return False + + @staticmethod + def filter_hvt(records: list) -> list: + hvt = [r for r in records if HVTAnalyzer.is_hvt(r)] + return sorted(hvt, key=lambda r: _rec_get(r, "risk_score") or 0, reverse=True) + + @staticmethod + def annotate(records: list) -> list: + for rec in records: + flag = HVTAnalyzer.is_hvt(rec) + if isinstance(rec, dict): + rec["is_hvt"] = flag + else: + rec.is_hvt = flag + return records + + +# ── Forensic Persistence Layer ───────────────────────────────────────── +class DatabaseManager: + """ + Async aiosqlite persistence layer for CTI data with 24 h query cache + and SHA-256 deduplication. Falls back to synchronous sqlite3 when + aiosqlite is not installed. + """ + + def __init__(self, path: Optional[str] = None) -> None: + self.path = path or str(HOME_NOX / "nox_cache.db") + self._use_async = aiosqlite is not None + # Initialise schema synchronously so the constructor stays non-async. + self._init_sync() + + # ── Schema bootstrap ────────────────────────────────────────────── + + def _init_sync(self) -> None: + con = _sqlite3_fallback.connect(self.path, timeout=15) + con.execute("PRAGMA journal_mode=WAL") + # Run column migrations before applying full schema (handles existing DBs) + _migrations = [ + "ALTER TABLE leaks ADD COLUMN dedup_hash TEXT", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_leaks_dedup_unique ON leaks(dedup_hash) WHERE dedup_hash IS NOT NULL", + ] + for stmt in _migrations: + try: + con.execute(stmt) + con.commit() + except _sqlite3_fallback.OperationalError: + pass # column already exists or table doesn't exist yet + con.executescript(_INTEL_SCHEMA) + con.commit() + con.close() + + # ── Public async API ────────────────────────────────────────────── + + async def get_cached(self, query: str) -> Optional[List[dict]]: + q_lower = query.lower() + if self._use_async: + async with aiosqlite.connect(self.path, timeout=15) as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + async with db.execute( + "SELECT id, scanned FROM query_cache WHERE query=?", (q_lower,) + ) as cur: + row = await cur.fetchone() + if not row: + return None + if datetime.now(timezone.utc).timestamp() - row["scanned"] > Cfg.CACHE_TTL: + return None + async with db.execute( + "SELECT * FROM leaks WHERE email=? OR username=?", + (q_lower, q_lower), + ) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + else: + return self._get_cached_sync(q_lower) + + async def cache_records(self, query: str, qtype: str, records: list) -> None: + if self._use_async: + await self._cache_records_async(query, qtype, records) + else: + self._cache_records_sync(query, qtype, records) + + async def save_correlations(self, query: str, profiles: List[TargetProfile]) -> None: + if self._use_async: + await self._save_correlations_async(profiles) + else: + self._save_correlations_sync(profiles) + + async def save_record(self, r: "Record") -> None: + if self._use_async: + async with aiosqlite.connect(self.path, timeout=15) as db: + await db.execute( + "INSERT OR IGNORE INTO intel_records " + "(source, target, email, password, phone, address, full_name, fingerprint) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (r.source, getattr(r, "target", ""), r.email, r.password, + r.phone, r.address, r.full_name, r.get_fingerprint()), + ) + await db.commit() + else: + with _sqlite3_fallback.connect(self.path, timeout=15) as db: + db.execute( + "INSERT OR IGNORE INTO intel_records " + "(source, target, email, password, phone, address, full_name, fingerprint) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (r.source, getattr(r, "target", ""), r.email, r.password, + r.phone, r.address, r.full_name, r.get_fingerprint()), + ) + + async def get_hvt_identities(self) -> List[dict]: + if self._use_async: + async with aiosqlite.connect(self.path, timeout=15) as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + async with db.execute( + "SELECT * FROM identities WHERE is_hvt=1 ORDER BY max_risk DESC" + ) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + else: + return self._get_hvt_sync() + + # ── Async implementations ───────────────────────────────────────── + + async def _cache_records_async(self, query: str, qtype: str, records: list) -> None: + seen_hashes: Set[str] = set() + async with aiosqlite.connect(self.path, timeout=15) as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + try: + await db.execute( + "INSERT OR REPLACE INTO query_cache (query, qtype) VALUES (?,?)", + (query.lower(), qtype), + ) + for rec in records: + dk = rec.dedup_key() if hasattr(rec, "dedup_key") else "" + if dk and dk in seen_hashes: + continue + if dk: + seen_hashes.add(dk) + ident = rec.email or rec.username or rec.phone or query + is_hvt = int(bool(_VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident))) + await db.execute( + "INSERT OR IGNORE INTO identities (primary_id, is_hvt) VALUES (?,?)", + (ident, is_hvt), + ) + async with db.execute( + "SELECT id FROM identities WHERE primary_id=?", (ident,) + ) as cur: + row = await cur.fetchone() + if not row: + continue + iid = row["id"] + await db.execute( + """INSERT INTO leaks + (identity_id, source, email, username, password, + password_hash, hash_type, phone, breach_name, + breach_date, risk_score, source_conf, data_types, is_hvt, dedup_hash) + VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", + ( + iid, rec.source, rec.email, rec.username, + rec.password, rec.password_hash, rec.hash_type, + rec.phone, rec.breach_name, rec.breach_date, + getattr(rec, "risk_score", 0.0), + getattr(rec, "source_confidence", 0.5), + json.dumps(rec.data_types), + is_hvt, dk, + ), + ) + await db.commit() + except Exception as exc: + logger.warning("DB store error: %s", exc) + + async def _save_correlations_async(self, profiles: List[TargetProfile]) -> None: + async with aiosqlite.connect(self.path, timeout=15) as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + try: + for profile in profiles: + await db.execute( + """UPDATE identities + SET emails=?, usernames=?, phones=?, + max_risk=?, is_hvt=?, pivot_count=? + WHERE primary_id=?""", + ( + json.dumps(profile.emails), + json.dumps(profile.usernames), + json.dumps(profile.phones), + profile.max_risk, + int(profile.is_hvt), + json.dumps(profile.pivot_count), + profile.primary_id, + ), + ) + async with db.execute( + "SELECT id FROM identities WHERE primary_id=?", (profile.primary_id,) + ) as cur: + row = await cur.fetchone() + if not row: + continue + iid = row["id"] + for pivot_val, count in profile.pivot_count.items(): + if count > 1: + # I6: use Detect.qtype instead of length heuristic + _ptype = Detect.qtype(pivot_val) + if _ptype not in ("email", "username", "phone", "domain", "ip"): + _ptype = "username" + await db.execute( + """INSERT INTO correlation_links + (identity_id, pivot_type, pivot_value, linked_ids) + VALUES (?,?,?,?)""", + ( + iid, + _ptype, + pivot_val[:64], + json.dumps(profile.emails[:10]), + ), + ) + await db.commit() + except Exception as exc: + logger.warning("DB correlation error: %s", exc) + + # ── Synchronous fallbacks (used when aiosqlite is absent) ───────── + + def _get_cached_sync(self, q_lower: str) -> Optional[List[dict]]: + con = _sqlite3_fallback.connect(self.path, timeout=15) + con.row_factory = _sqlite3_fallback.Row + con.execute("PRAGMA journal_mode=WAL") + try: + row = con.execute( + "SELECT id, scanned FROM query_cache WHERE query=?", (q_lower,) + ).fetchone() + if not row: + return None + if datetime.now(timezone.utc).timestamp() - row["scanned"] > Cfg.CACHE_TTL: + return None + return [ + dict(r) for r in con.execute( + "SELECT * FROM leaks WHERE email=? OR username=?", + (q_lower, q_lower), + ).fetchall() + ] + finally: + con.close() + + def _cache_records_sync(self, query: str, qtype: str, records: list) -> None: + con = _sqlite3_fallback.connect(self.path, timeout=15) + con.row_factory = _sqlite3_fallback.Row + con.execute("PRAGMA journal_mode=WAL") + seen_hashes: Set[str] = set() + try: + con.execute( + "INSERT OR REPLACE INTO query_cache (query, qtype) VALUES (?,?)", + (query.lower(), qtype), + ) + for rec in records: + dk = rec.dedup_key() if hasattr(rec, "dedup_key") else "" + if dk and dk in seen_hashes: + continue + if dk: + seen_hashes.add(dk) + ident = rec.email or rec.username or rec.phone or query + is_hvt = int(bool(_VIP_EMAIL_RE.search(ident) or _VIP_DOM_RE.search(ident))) + con.execute( + "INSERT OR IGNORE INTO identities (primary_id, is_hvt) VALUES (?,?)", + (ident, is_hvt), + ) + row = con.execute( + "SELECT id FROM identities WHERE primary_id=?", (ident,) + ).fetchone() + if not row: + continue + iid = row["id"] + con.execute( + """INSERT INTO leaks + (identity_id, source, email, username, password, + password_hash, hash_type, phone, breach_name, + breach_date, risk_score, source_conf, data_types, is_hvt, dedup_hash) + VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", + ( + iid, rec.source, rec.email, rec.username, + rec.password, rec.password_hash, rec.hash_type, + rec.phone, rec.breach_name, rec.breach_date, + getattr(rec, "risk_score", 0.0), + getattr(rec, "source_confidence", 0.5), + json.dumps(rec.data_types), + is_hvt, dk, + ), + ) + con.commit() + except _sqlite3_fallback.OperationalError as exc: + logger.warning("DB store error: %s", exc) + finally: + con.close() + + def _save_correlations_sync(self, profiles: List[TargetProfile]) -> None: + con = _sqlite3_fallback.connect(self.path, timeout=15) + con.row_factory = _sqlite3_fallback.Row + con.execute("PRAGMA journal_mode=WAL") + try: + for profile in profiles: + con.execute( + """UPDATE identities + SET emails=?, usernames=?, phones=?, + max_risk=?, is_hvt=?, pivot_count=? + WHERE primary_id=?""", + ( + json.dumps(profile.emails), + json.dumps(profile.usernames), + json.dumps(profile.phones), + profile.max_risk, + int(profile.is_hvt), + json.dumps(profile.pivot_count), + profile.primary_id, + ), + ) + row = con.execute( + "SELECT id FROM identities WHERE primary_id=?", (profile.primary_id,) + ).fetchone() + if not row: + continue + iid = row["id"] + for pivot_val, count in profile.pivot_count.items(): + if count > 1: + # I6: use Detect.qtype instead of length heuristic + _ptype = Detect.qtype(pivot_val) + if _ptype not in ("email", "username", "phone", "domain", "ip"): + _ptype = "username" + con.execute( + """INSERT INTO correlation_links + (identity_id, pivot_type, pivot_value, linked_ids) + VALUES (?,?,?,?)""", + ( + iid, + _ptype, + pivot_val[:64], + json.dumps(profile.emails[:10]), + ), + ) + con.commit() + except _sqlite3_fallback.OperationalError as exc: + logger.warning("DB correlation error: %s", exc) + finally: + con.close() + + def _get_hvt_sync(self) -> List[dict]: + con = _sqlite3_fallback.connect(self.path, timeout=15) + con.row_factory = _sqlite3_fallback.Row + con.execute("PRAGMA journal_mode=WAL") + try: + return [ + dict(r) for r in con.execute( + "SELECT * FROM identities WHERE is_hvt=1 ORDER BY max_risk DESC" + ).fetchall() + ] + finally: + con.close() + + +# ── Legacy DB (backward-compatible) ─────────────────────────────────── +class DB: + """ + Legacy synchronous DB facade. Internally uses aiosqlite when available, + running coroutines via a dedicated background event loop so callers + remain synchronous. Falls back to sqlite3 when aiosqlite is absent. + """ + + def __init__(self, path=None): + self.path = str(path or Cfg.DB) + self._use_async = aiosqlite is not None + if self._use_async: + import threading as _threading + self._loop = asyncio.new_event_loop() + self._loop_thread = _threading.Thread( + target=self._loop.run_forever, daemon=True, name="nox-db-loop" + ) + self._loop_thread.start() + self._init() + + # ── Internal helpers ────────────────────────────────────────────── + + def _run(self, coro): + """Submit a coroutine to the background loop and block until done.""" + fut = asyncio.run_coroutine_threadsafe(coro, self._loop) + return fut.result(timeout=60) + + async def _exec(self, sql: str, params: tuple = ()) -> None: + async with aiosqlite.connect(self.path, timeout=15) as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.execute(sql, params) + await db.commit() + + async def _fetchone(self, sql: str, params: tuple = ()) -> Optional[dict]: + async with aiosqlite.connect(self.path, timeout=15) as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + async with db.execute(sql, params) as cur: + row = await cur.fetchone() + return dict(row) if row else None + + async def _fetchall(self, sql: str, params: tuple = ()) -> List[dict]: + async with aiosqlite.connect(self.path, timeout=15) as db: + db.row_factory = aiosqlite.Row + await db.execute("PRAGMA journal_mode=WAL") + async with db.execute(sql, params) as cur: + rows = await cur.fetchall() + return [dict(r) for r in rows] + + async def _init_async(self) -> None: + async with aiosqlite.connect(self.path, timeout=15) as db: + await db.execute("PRAGMA journal_mode=WAL") + await db.executescript(""" + CREATE TABLE IF NOT EXISTS breach_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT NOT NULL, + source TEXT NOT NULL, data TEXT NOT NULL, ts REAL NOT NULL, + ttl INTEGER DEFAULT 86400, UNIQUE(query, source)); + CREATE TABLE IF NOT EXISTS credentials ( + id INTEGER PRIMARY KEY AUTOINCREMENT, email TEXT, username TEXT, + password TEXT, password_hash TEXT, hash_type TEXT, source TEXT, + breach_name TEXT, breach_date TEXT, ts REAL DEFAULT (strftime('%s','now')), + UNIQUE(email, password_hash, source)); + CREATE TABLE IF NOT EXISTS hash_cache ( + hash TEXT PRIMARY KEY, hash_type TEXT, plaintext TEXT, + source TEXT, ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS api_keys ( + service TEXT PRIMARY KEY, key TEXT NOT NULL, + ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS scans ( + id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, qtype TEXT, + results INTEGER, sources INTEGER, duration REAL, + ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS dork_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, engine TEXT, + dork TEXT, results TEXT, ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS paste_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, site TEXT, + pid TEXT, content TEXT, ts REAL DEFAULT (strftime('%s','now')), + UNIQUE(query, site, pid)); + CREATE TABLE IF NOT EXISTS wordlists ( + id INTEGER PRIMARY KEY AUTOINCREMENT, target TEXT, + data TEXT, ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS config ( + key TEXT PRIMARY KEY, value TEXT); + CREATE INDEX IF NOT EXISTS idx_cred_email ON credentials(email); + CREATE INDEX IF NOT EXISTS idx_cred_user ON credentials(username); + CREATE INDEX IF NOT EXISTS idx_cred_hash ON credentials(password_hash); + CREATE INDEX IF NOT EXISTS idx_cache_q ON breach_cache(query); + """) + await db.commit() + + # ── Sync fallback helpers ───────────────────────────────────────── + + @contextmanager + def _conn(self): + c = _sqlite3_fallback.connect(self.path, timeout=15) + c.row_factory = _sqlite3_fallback.Row + c.execute("PRAGMA journal_mode=WAL") + try: + yield c + c.commit() + finally: + c.close() + + def _init_sync(self): + with self._conn() as c: + c.executescript(""" + CREATE TABLE IF NOT EXISTS breach_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT NOT NULL, + source TEXT NOT NULL, data TEXT NOT NULL, ts REAL NOT NULL, + ttl INTEGER DEFAULT 86400, UNIQUE(query, source)); + CREATE TABLE IF NOT EXISTS credentials ( + id INTEGER PRIMARY KEY AUTOINCREMENT, email TEXT, username TEXT, + password TEXT, password_hash TEXT, hash_type TEXT, source TEXT, + breach_name TEXT, breach_date TEXT, ts REAL DEFAULT (strftime('%s','now')), + UNIQUE(email, password_hash, source)); + CREATE TABLE IF NOT EXISTS hash_cache ( + hash TEXT PRIMARY KEY, hash_type TEXT, plaintext TEXT, + source TEXT, ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS api_keys ( + service TEXT PRIMARY KEY, key TEXT NOT NULL, + ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS scans ( + id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, qtype TEXT, + results INTEGER, sources INTEGER, duration REAL, + ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS dork_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, engine TEXT, + dork TEXT, results TEXT, ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS paste_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT, site TEXT, + pid TEXT, content TEXT, ts REAL DEFAULT (strftime('%s','now')), + UNIQUE(query, site, pid)); + CREATE TABLE IF NOT EXISTS wordlists ( + id INTEGER PRIMARY KEY AUTOINCREMENT, target TEXT, + data TEXT, ts REAL DEFAULT (strftime('%s','now'))); + CREATE TABLE IF NOT EXISTS config ( + key TEXT PRIMARY KEY, value TEXT); + CREATE INDEX IF NOT EXISTS idx_cred_email ON credentials(email); + CREATE INDEX IF NOT EXISTS idx_cred_user ON credentials(username); + CREATE INDEX IF NOT EXISTS idx_cred_hash ON credentials(password_hash); + CREATE INDEX IF NOT EXISTS idx_cache_q ON breach_cache(query); + """) + + # ── Schema init dispatcher ──────────────────────────────────────── + + def _init(self): + if self._use_async: + self._run(self._init_async()) + else: + self._init_sync() + + # ── Public API ──────────────────────────────────────────────────── + + def get_cache(self, q, src): + if self._use_async: + row = self._run(self._fetchone( + "SELECT data,ts,ttl FROM breach_cache WHERE query=? AND source=?", + (q.lower(), src), + )) + if row and (time.time() - row["ts"]) < row["ttl"]: + return json.loads(row["data"]) + return None + with self._conn() as c: + r = c.execute( + "SELECT data,ts,ttl FROM breach_cache WHERE query=? AND source=?", + (q.lower(), src), + ).fetchone() + if r and (time.time() - r["ts"]) < r["ttl"]: + return json.loads(r["data"]) + return None + + def set_cache(self, q, src, data, ttl=None): + sql = "INSERT OR REPLACE INTO breach_cache (query,source,data,ts,ttl) VALUES (?,?,?,?,?)" + params = (q.lower(), src, json.dumps(data, default=str), time.time(), ttl or Cfg.CACHE_TTL) + if self._use_async: + self._run(self._exec(sql, params)) + else: + with self._conn() as c: + c.execute(sql, params) + + def store_cred(self, rec): + # Use (email, password_hash, source) when hash is present; + # fall back to (email, password, source) for cleartext-only records + # so distinct cleartext passwords are never silently dropped. + if rec.password_hash: + sql = ("INSERT OR IGNORE INTO credentials " + "(email,username,password,password_hash,hash_type,source,breach_name,breach_date) " + "VALUES (?,?,?,?,?,?,?,?)") + params = (rec.email, rec.username, rec.password, rec.password_hash, rec.hash_type, rec.source, rec.breach_name, rec.breach_date) + else: + sql = ("INSERT OR IGNORE INTO credentials " + "(email,username,password,password_hash,hash_type,source,breach_name,breach_date) " + "SELECT ?,?,?,?,?,?,?,? WHERE NOT EXISTS " + "(SELECT 1 FROM credentials WHERE email=? AND password=? AND source=?)") + params = (rec.email, rec.username, rec.password, rec.password_hash, rec.hash_type, rec.source, rec.breach_name, rec.breach_date, + rec.email, rec.password, rec.source) + if self._use_async: + self._run(self._exec(sql, params)) + else: + with self._conn() as c: + c.execute(sql, params) + + def get_key(self, svc): + if self._use_async: + row = self._run(self._fetchone( + "SELECT key FROM api_keys WHERE service=?", (svc.lower(),) + )) + else: + with self._conn() as c: + r = c.execute("SELECT key FROM api_keys WHERE service=?", (svc.lower(),)).fetchone() + row = dict(r) if r else None + if row: + return row["key"] + svc_up = svc.upper().replace("-", "_") + return ( + os.environ.get(svc_up) + or os.environ.get(f"{svc_up}_API_KEY") + or os.environ.get(f"NOX_{svc_up}_KEY") + or os.environ.get(f"NOX_{svc_up}_API_KEY") + or "" + ) + + def set_key(self, svc, key): + sql = "INSERT OR REPLACE INTO api_keys (service, key) VALUES (?,?)" + params = (svc.lower(), key) + if self._use_async: + self._run(self._exec(sql, params)) + else: + with self._conn() as c: + c.execute(sql, params) + + def store_hash(self, h, ht, pt, src): + sql = "INSERT OR REPLACE INTO hash_cache (hash,hash_type,plaintext,source) VALUES (?,?,?,?)" + params = (h, ht, pt, src) + if self._use_async: + self._run(self._exec(sql, params)) + else: + with self._conn() as c: + c.execute(sql, params) + + def get_plain(self, h): + if self._use_async: + row = self._run(self._fetchone( + "SELECT plaintext FROM hash_cache WHERE hash=?", (h,) + )) + return row["plaintext"] if row else None + with self._conn() as c: + r = c.execute("SELECT plaintext FROM hash_cache WHERE hash=?", (h,)).fetchone() + return r["plaintext"] if r else None + + def log_scan(self, q, qt, n, s, d): + sql = "INSERT INTO scans (query,qtype,results,sources,duration) VALUES (?,?,?,?,?)" + params = (q, qt, n, s, d) + if self._use_async: + self._run(self._exec(sql, params)) + else: + with self._conn() as c: + c.execute(sql, params) + + def get_creds(self, q): + sql = "SELECT * FROM credentials WHERE email=? OR username=? ORDER BY ts DESC" + params = (q.lower(), q.lower()) + if self._use_async: + return self._run(self._fetchall(sql, params)) + with self._conn() as c: + return [dict(r) for r in c.execute(sql, params).fetchall()] + + def set_config(self, k, v): + sql = "INSERT OR REPLACE INTO config (key, value) VALUES (?,?)" + params = (k, v) + if self._use_async: + self._run(self._exec(sql, params)) + else: + with self._conn() as c: + c.execute(sql, params) + + def get_config(self, k, default=""): + if self._use_async: + row = self._run(self._fetchone( + "SELECT value FROM config WHERE key=?", (k,) + )) + return row["value"] if row else default + with self._conn() as c: + r = c.execute("SELECT value FROM config WHERE key=?", (k,)).fetchone() + return r["value"] if r else default + + def close(self) -> None: + """Stop the background event loop thread and release resources.""" + if self._use_async and hasattr(self, "_loop") and self._loop.is_running(): + self._loop.call_soon_threadsafe(self._loop.stop) + if hasattr(self, "_loop_thread"): + self._loop_thread.join(timeout=5) + + def __del__(self) -> None: + try: + self.close() + except Exception: + pass + + +NoxDB = DB + + +# ── Async TLS Context (JA3 fingerprint matching) ─────────────────────── +def _build_ssl_context() -> ssl.SSLContext: + """ + Build an SSLContext that mirrors a modern Chrome/Firefox TLS handshake + to prevent bot-detection false positives. + """ + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.minimum_version = ssl.TLSVersion.TLSv1_2 + ctx.set_ciphers(Cfg.TLS_CIPHERS) + ctx.check_hostname = True + ctx.verify_mode = ssl.CERT_REQUIRED + return ctx + + +_SSL_CTX = _build_ssl_context() + + +# ── Header randomisation helpers ────────────────────────────────────── +_UA_POOL = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; rv:133.0) Gecko/20100101 Firefox/133.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0", + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0", + "Mozilla/5.0 (iPhone; CPU iPhone OS 18_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (Android 15; Mobile; rv:133.0) Gecko/133.0 Firefox/133.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15", +] + +_ACCEPT_LANG_POOL = [ + "en-US,en;q=0.9", + "en-GB,en;q=0.9,en-US;q=0.8", + "en-US,en;q=0.8,fr;q=0.5", + "en-CA,en;q=0.9", + "en-AU,en;q=0.9,en-US;q=0.8", +] + +_SEC_FETCH_DEST_POOL = ["document", "empty", "image", "script", "style"] +_SEC_FETCH_MODE_POOL = ["navigate", "cors", "no-cors", "same-origin"] +_SEC_FETCH_SITE_POOL = ["none", "same-origin", "cross-site", "same-site"] + + +def _random_headers(extra: Optional[Dict] = None) -> Dict[str, str]: + """Return a randomised, browser-grade header set.""" + h = { + "User-Agent": random.choice(_UA_POOL), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": random.choice(_ACCEPT_LANG_POOL), + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": random.choice(_SEC_FETCH_DEST_POOL), + "Sec-Fetch-Mode": random.choice(_SEC_FETCH_MODE_POOL), + "Sec-Fetch-Site": random.choice(_SEC_FETCH_SITE_POOL), + "Cache-Control": "max-age=0", + } + if extra: + h.update(extra) + return h + + +async def _jitter(cfg: "NoxConfig") -> None: + """Asynchronous jittered delay to respect server rate limits.""" + if cfg.stealth: + lo, hi = cfg.rate_limit + await asyncio.sleep(random.uniform(lo, hi)) + + +# ── Async Source Base ────────────────────────────────────────────────── +class AsyncSource(ABC): + """ + Base class for all async breach sources. + Subclasses implement `async_search` which is called by the Orchestrator + through a shared asyncio.Semaphore. + """ + + def __init__(self, semaphore, db: "DB", config: "NoxConfig") -> None: + # Accept either a pre-built Semaphore or an int concurrency limit. + # When an int is passed the semaphore is created lazily on first use + # inside a running event loop (required on Python 3.10+). + if isinstance(semaphore, asyncio.Semaphore): + self._sem_obj: Optional[asyncio.Semaphore] = semaphore + self._sem_limit: int = Cfg.CONCURRENCY # unused when _sem_obj is set + else: + self._sem_obj = None + self._sem_limit = int(semaphore) if semaphore else Cfg.CONCURRENCY + self._db = db + self._config = config + self.name = "Unknown" + self.needs_key = False + self.key_name = "" + self.ok_email = True + self.ok_user = True + self.ok_phone = False + self.ok_domain = False + self.ok_ip = False + self.ok_hash = False + self.ok_pass = False + self.ok_name = False + self.ok_url = False + + @property + def _sem(self) -> asyncio.Semaphore: + """Return the semaphore, creating it lazily inside the running loop.""" + if self._sem_obj is None: + self._sem_obj = asyncio.Semaphore(self._sem_limit) + return self._sem_obj + + def _key(self) -> str: + if not self.key_name: + return "" + svc = self.key_name[:-8] if self.key_name.endswith("_api_key") else self.key_name + return self._db.get_key(svc) + + def _ok(self, qt: str) -> bool: + m = { + "email": self.ok_email, "username": self.ok_user, "phone": self.ok_phone, + "domain": self.ok_domain, "ip": self.ok_ip, "hash": self.ok_hash, + "password": self.ok_pass, "name": self.ok_name, "url": self.ok_url, + } + return m.get(qt, False) + + def _rec(self, **kw) -> Record: + kw.setdefault("source", self.name) + sev = kw.pop("severity", Severity.MEDIUM) + r = Record(**{k: v for k, v in kw.items() if k in Record.__dataclass_fields__}) + r.severity = sev + return r + + async def _get(self, session: "aiohttp.ClientSession", url: str, headers: Dict = None, timeout: int = None) -> Tuple[int, str, bytes]: + """Perform a GET with jitter and retry logic.""" + await _jitter(self._config) + to = aiohttp_mod.ClientTimeout(total=timeout or self._config.timeout) if aiohttp_mod else None + hdrs = _random_headers(headers) + for attempt in range(Cfg.RETRIES): + try: + async with self._sem: + async with session.get(url, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: + if resp.status == 429: + retry_after = int(resp.headers.get("Retry-After", Cfg.RETRY_DELAY * (attempt + 2))) + _syslog.info("RATE_LIMIT source=%s url=%s retry_after=%ds", self.name, url[:80], retry_after) + await asyncio.sleep(min(retry_after, 30)) + continue + body = await resp.read() + if resp.status >= 400: + _syslog.warning("API_ERROR source=%s status=%d url=%s", self.name, resp.status, url[:80]) + return resp.status, await resp.text(errors="replace"), body + except Exception as exc: + if attempt < Cfg.RETRIES - 1: + await asyncio.sleep(Cfg.RETRY_DELAY * (attempt + 1)) + continue + _syslog.debug("API_FAIL source=%s url=%s error=%s", self.name, url[:80], exc) + return 0, "", b"" + + async def _post(self, session: "aiohttp.ClientSession", url: str, json_data: Dict = None, data: Dict = None, headers: Dict = None, timeout: int = None) -> Tuple[int, str, bytes]: + """Perform a POST with jitter and retry logic.""" + await _jitter(self._config) + to = aiohttp_mod.ClientTimeout(total=timeout or self._config.timeout) if aiohttp_mod else None + hdrs = _random_headers(headers) + for attempt in range(Cfg.RETRIES): + try: + async with self._sem: + if json_data is not None: + hdrs["Content-Type"] = "application/json" + async with session.post(url, json=json_data, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: + if resp.status == 429: + retry_after = int(resp.headers.get("Retry-After", Cfg.RETRY_DELAY * (attempt + 2))) + _syslog.info("RATE_LIMIT source=%s url=%s retry_after=%ds", self.name, url[:80], retry_after) + await asyncio.sleep(min(retry_after, Cfg.RETRY_DELAY * (attempt + 2))) + continue + body = await resp.read() + if resp.status >= 400: + _syslog.warning("API_ERROR source=%s status=%d url=%s", self.name, resp.status, url[:80]) + return resp.status, await resp.text(errors="replace"), body + else: + async with session.post(url, data=data or {}, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: + if resp.status == 429: + retry_after = int(resp.headers.get("Retry-After", Cfg.RETRY_DELAY * (attempt + 2))) + _syslog.info("RATE_LIMIT source=%s url=%s retry_after=%ds", self.name, url[:80], retry_after) + await asyncio.sleep(min(retry_after, Cfg.RETRY_DELAY * (attempt + 2))) + continue + body = await resp.read() + if resp.status >= 400: + _syslog.warning("API_ERROR source=%s status=%d url=%s", self.name, resp.status, url[:80]) + return resp.status, await resp.text(errors="replace"), body + except Exception as exc: + if attempt < Cfg.RETRIES - 1: + await asyncio.sleep(Cfg.RETRY_DELAY * (attempt + 1)) + continue + _syslog.debug("API_FAIL source=%s url=%s error=%s", self.name, url[:80], exc) + return 0, "", b"" + + @abstractmethod + async def async_search(self, session: "aiohttp.ClientSession", query: str, qtype: str) -> List[Record]: + """Coroutine that returns a list of Records for the given query.""" + + def search(self, query: str, qtype: str) -> List[Record]: + """Synchronous shim — runs the coroutine in a new event loop (fallback).""" + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + try: + if loop and loop.is_running(): + import concurrent.futures + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex: + fut = ex.submit(asyncio.run, self._run_search(query, qtype)) + return fut.result(timeout=self._config.timeout + 10) + return asyncio.run(self._run_search(query, qtype)) + except Exception: + return [] + + async def _run_search(self, query: str, qtype: str) -> List[Record]: + if not aiohttp_mod: + return [] + connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX, limit=10, family=0) # AF_UNSPEC + async with aiohttp_mod.ClientSession(connector=connector) as session: + return await self.async_search(session, query, qtype) + + +# ── Legacy sync shim (keeps all existing Src subclasses working) ─────── +class Src(AsyncSource): + """ + Backward-compatible base that wraps the original synchronous `search` + pattern while exposing the new AsyncSource interface. + """ + + def __init__(self, semaphore_or_session, db: "DB", config: "NoxConfig" = None) -> None: + if isinstance(semaphore_or_session, asyncio.Semaphore): + sem = semaphore_or_session + self._legacy_session = None + else: + # Legacy: passed a Session object — use int limit so semaphore + # is created lazily inside the event loop (Python 3.13 safe). + sem = Cfg.CONCURRENCY + self._legacy_session = semaphore_or_session + super().__init__(sem, db, config or NoxConfig()) + # Legacy attribute alias + self.s = self._legacy_session + + async def async_search(self, session: "aiohttp.ClientSession", query: str, qtype: str) -> List[Record]: + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, self.search, query, qtype) + + @abstractmethod + def search(self, query: str, qtype: str) -> List[Record]: + pass + + +# ── Input Detection ──────────────────────────────────────────────────── +class Detect: + @staticmethod + def qtype(q: str) -> str: + q = q.strip() + if re.match(r"^[\w.+-]+@[\w-]+\.[\w.]+$", q): return "email" + if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", q) and all(0 <= int(o) <= 255 for o in q.split(".")): return "ip" + if re.match(r"^(\+?\d{1,3}[\s.-]?)?\(?\d{2,4}\)?[\s.-]?\d{3,4}[\s.-]?\d{3,4}$", q): return "phone" + if re.match(r"^[a-fA-F0-9]{32,128}$", q): return "hash" + if re.match(r"^\$2[aby]?\$", q) or re.match(r"^\$argon2", q) or re.match(r"^\$[156]\$", q): return "hash" + if re.match(r"^https?://", q): return "url" + if re.match(r"^[a-zA-Z0-9]([a-zA-Z0-9-]*\.)+[a-zA-Z]{2,}$", q) and "." in q: return "domain" + if len(q) <= 30 and re.match(r"^[\w.-]+$", q): return "username" + if " " in q and len(q.split()) >= 2 and len(q) <= 60: return "name" + return "username" + + +# ── Legacy synchronous Session (kept for Src subclasses) ────────────── +class Session: + UA = _UA_POOL + + def __init__(self, config: NoxConfig) -> None: + self.config = config + self.use_tor = config.use_tor + self.proxy = config.proxy + self._lock = threading.Lock() + self._n = 0 + self._s = None + self._cs = None + if requests: + self._s = requests.Session() + self._s.verify = True + if self.use_tor: + self._s.proxies = { + "http": f"socks5h://127.0.0.1:{config.tor_socks}", + "https": f"socks5h://127.0.0.1:{config.tor_socks}", + } + if cloudscraper: + try: + self._cs = cloudscraper.create_scraper( + browser={"browser": "chrome", "platform": "windows", "mobile": False} + ) + if self.use_tor: + self._cs.proxies = { + "http": f"socks5h://127.0.0.1:{config.tor_socks}", + "https": f"socks5h://127.0.0.1:{config.tor_socks}", + } + except Exception: + pass + self._jar = http.cookiejar.CookieJar() + self._opener = urllib.request.build_opener( + urllib.request.HTTPCookieProcessor(self._jar), + urllib.request.HTTPRedirectHandler(), + ) + + def _hdrs(self, extra: Dict = None) -> Dict: + return _random_headers(extra) + + def _rl(self) -> None: + if self.config.stealth: + time.sleep(random.uniform(*self.config.rate_limit)) + with self._lock: + self._n += 1 + + @staticmethod + def _make_response(status: int, body: bytes, hdrs: dict, url: str): + text = body.decode("utf-8", errors="replace") + _body = body + + def _json(*_): + return json.loads(_body.decode("utf-8", errors="replace")) + + ok = 200 <= status < 300 + return type("R", (), { + "status_code": status, "ok": ok, + "text": text, "content": _body, + "json": _json, "headers": hdrs, "url": url, + })() + + @staticmethod + def _null_response(url: str = ""): + def _json(*_): return {} + return type("R", (), { + "status_code": 0, "ok": False, "text": "", "content": b"", + "json": _json, "headers": {}, "url": url, + })() + + def get(self, url: str, extra_headers: Dict = None, timeout: int = None, use_cloudscraper: bool = False): + self._rl() + to = timeout or self.config.timeout + hdrs = self._hdrs(extra_headers) + for attempt in range(Cfg.RETRIES): + try: + if use_cloudscraper and self._cs: + r = self._cs.get(url, headers=hdrs, timeout=to) + elif self._s: + px = {"http": self.proxy, "https": self.proxy} if self.proxy else None + r = self._s.get(url, headers=hdrs, timeout=to, proxies=px) + else: + req = urllib.request.Request(url, headers=hdrs) + raw = self._opener.open(req, timeout=to) + data = raw.read() + if raw.headers.get("Content-Encoding") == "gzip": + data = gzip.decompress(data) + return self._make_response(raw.status, data, dict(raw.headers), raw.url) + if getattr(r, "status_code", 0) == 429: + retry_after = int(r.headers.get("Retry-After", Cfg.RETRY_DELAY * (attempt + 2))) + time.sleep(min(retry_after, 30)) + continue + return r + except Exception as e: + if attempt < Cfg.RETRIES - 1: + time.sleep(Cfg.RETRY_DELAY * (attempt + 1)) + continue + logger.debug("GET fail %s: %s", url, e) + return self._null_response(url) + + def post(self, url: str, data: Dict = None, json_data: Dict = None, extra_headers: Dict = None, timeout: int = None): + self._rl() + to = timeout or self.config.timeout + hdrs = self._hdrs(extra_headers) + for attempt in range(Cfg.RETRIES): + try: + if self._s: + if json_data: + hdrs["Content-Type"] = "application/json" + r = self._s.post(url, json=json_data, headers=hdrs, timeout=to) + else: + r = self._s.post(url, data=data, headers=hdrs, timeout=to) + if getattr(r, "status_code", 0) == 429: + retry_after = int(r.headers.get("Retry-After", Cfg.RETRY_DELAY * (attempt + 2))) + time.sleep(min(retry_after, 30)) + continue + return r + body = json.dumps(json_data).encode() if json_data else urllib.parse.urlencode(data or {}).encode() + hdrs["Content-Type"] = "application/json" if json_data else "application/x-www-form-urlencoded" + req = urllib.request.Request(url, data=body, headers=hdrs, method="POST") + raw = self._opener.open(req, timeout=to) + rd = raw.read() + if raw.headers.get("Content-Encoding") == "gzip": + rd = gzip.decompress(rd) + return self._make_response(raw.status, rd, dict(raw.headers), raw.url) + except Exception as e: + if attempt < Cfg.RETRIES - 1: + time.sleep(Cfg.RETRY_DELAY * (attempt + 1)) + continue + logger.debug("POST fail %s: %s", url, e) + return self._null_response(url) + + def new_circuit(self) -> bool: + if not stem: + return False + try: + from stem import Signal + from stem.control import Controller + with Controller.from_port(port=self.config.tor_ctrl) as ctrl: + ctrl.authenticate(password=self.config.tor_pass) + ctrl.signal(Signal.NEWNYM) + time.sleep(3) + return True + except Exception: + return False + + +# ======================================================================= +# SOURCE REGISTRY +# ======================================================================= + +class Registry: + """All intelligence sources are loaded dynamically from sources/*.json by SourceOrchestrator.""" + + @classmethod + def get(cls, session: "Session", db: "DB", qt: str = None) -> list: + return [] + + @classmethod + def count(cls) -> int: + return 0 + + +class _LegacySourcePlaceholder(Src): + async def async_search(self, session, query, qtype): return [] + def search(self, query, qtype): return [] + + + + +# ======================================================================= +# PROXY MANAGER — Guardian System +# ======================================================================= + +class ProxyManager: + """ + Dynamic proxy engine ("Guardian System"). + + Priority: + 1. proxies.txt in the working directory — loaded and validated. + 2. Auto-fetch from ProxyScrape API if proxies.txt is missing. + 3. Direct connection fallback if auto-fetch fails. + + Proxies are stored in memory and rotated per-request by consumers. + + Fail-Safe: when allow_leak=False (default) and a proxy/Tor was explicitly + requested but no transport is available, execution is aborted to prevent + real-IP exposure. + """ + + _VALID_SCHEMES = ("http://", "https://", "socks5://", "socks4://") + _cache: List[str] = [] + + @classmethod + def reset(cls) -> None: + """Clear the cached proxy pool so the next call to get_proxies() re-fetches.""" + cls._cache = [] + + @classmethod + def get_proxies(cls) -> List[str]: + """Return a validated proxy list, fetching if necessary.""" + if cls._cache: + return list(cls._cache) + + proxy_file = Path("proxies.txt") + if proxy_file.exists(): + raw = [ + l.strip() for l in proxy_file.read_text().splitlines() + if l.strip() and any(l.strip().startswith(s) for s in cls._VALID_SCHEMES) + ] + if raw: + cls._cache = raw + out("info", f"[ProxyManager] Loaded {len(raw)} proxies from proxies.txt") + return list(cls._cache) + out("warn", "[ProxyManager] proxies.txt found but contains no valid entries — auto-fetching.") + + # Auto-fetch + print( + f"\n {C.BD}{C.Y}[!] OPSEC WARNING: Using public auto-fetched proxies. " + f"For professional engagements, use Tor (--tor) or a private proxies.txt.{C.X}\n" + ) + fetched = cls._fetch_proxies() + if fetched: + cls._cache = fetched + out("ok", f"[ProxyManager] Auto-fetched {len(fetched)} proxies.") + return list(cls._cache) + + # Failover: direct connection + print( + f"\n {C.BD}{C.R}[!] WARNING: Proxy auto-fetch failed. " + f"Falling back to DIRECT connection — your real IP may be exposed.{C.X}\n" + ) + cls._cache = [] + return [] + + @classmethod + def fail_safe_check(cls, config: "NoxConfig", allow_leak: bool = False) -> None: + """ + Fail-Safe Proxy enforcement. + + If the user explicitly requested a proxy or Tor but the transport is + unavailable, abort execution immediately to prevent IP leakage. + Pass allow_leak=True (--allow-leak flag) to bypass this check. + """ + proxy_requested = bool(config.proxy) or config.use_tor + if not proxy_requested: + return # Guardian Engine handles the no-proxy case separately + + transport_ready = False + if config.use_tor: + # Verify Tor SOCKS port is reachable + import socket + try: + s = socket.create_connection(("127.0.0.1", config.tor_socks), timeout=3) + s.close() + transport_ready = True + except OSError: + transport_ready = False + elif config.proxy: + # Treat any non-empty proxy string as "configured" — aiohttp will + # surface the error at request time; we just confirm it is set. + transport_ready = True + + if not transport_ready: + if allow_leak: + print( + f"\n {C.BD}{C.Y}[WARNING] OPSEC Alert: Proxy/Tor failed. " + f"Continuing execution with REAL IP (--allow-leak active).{C.X}\n" + ) + return + print( + f"\n {C.BD}{C.R}[CRITICAL] OPSEC FAILURE: Requested Proxy/Tor is unavailable. " + f"Execution aborted to prevent IP leak. Use --allow-leak to override.{C.X}\n" + ) + sys.exit(1) + + _PROXY_SOURCES = [ + ( + "https://api.proxyscrape.com/v2/" + "?request=displayproxies&protocol=http&timeout=5000" + "&country=all&ssl=all&anonymity=all" + ), + "https://www.proxy-list.download/api/v1/get?type=http&anon=elite", + "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt", + ] + + @classmethod + def _fetch_proxies(cls) -> List[str]: + proxies: List[str] = [] + for url in cls._PROXY_SOURCES: + if proxies: + break + try: + req = urllib.request.Request(url, headers={"User-Agent": "NOX Framework/ProxyManager"}) + raw = urllib.request.urlopen(req, timeout=10) + text = raw.read().decode("utf-8", errors="replace") + for line in text.splitlines(): + line = line.strip() + if not line: + continue + if re.match(r"^\d{1,3}(\.\d{1,3}){3}:\d{2,5}$", line): + proxies.append(f"http://{line}") + elif any(line.startswith(s) for s in cls._VALID_SCHEMES): + proxies.append(line) + if proxies: + logger.debug("ProxyManager: fetched %d proxies from %s", len(proxies), url) + except Exception as exc: + logger.debug("ProxyManager._fetch_proxies source=%s: %s", url, exc) + continue + return proxies[:200] + + @classmethod + def validate_proxy(cls, proxy: str, timeout: int = 6) -> Optional[str]: + """ + Test a proxy by requesting https://api.ipify.org. + Returns the observed exit IP on success, None on failure. + F1: SOCKS5 proxies are validated via requests+PySocks, not urllib. + """ + # F1: urllib.ProxyHandler does not support SOCKS5 — use requests if available + if proxy.startswith("socks5") or proxy.startswith("socks4"): + try: + import requests as _req # type: ignore + resp = _req.get("https://api.ipify.org", + proxies={"http": proxy, "https": proxy}, + timeout=timeout) + ip = resp.text.strip() + if re.match(r"^\d{1,3}(\.\d{1,3}){3}$", ip): + return ip + except Exception: + pass + return None + try: + import urllib.request as _ur + proxy_handler = _ur.ProxyHandler({"http": proxy, "https": proxy}) + opener = _ur.build_opener(proxy_handler) + resp = opener.open("https://api.ipify.org", timeout=timeout) + ip = resp.read().decode().strip() + if re.match(r"^\d{1,3}(\.\d{1,3}){3}$", ip): + return ip + except Exception: + pass + return None + + +# ======================================================================= +# DORKING ENGINE — passive document discovery + metadata extraction +# ======================================================================= + +class _DorkTemplates: + """Shared dork template lists — defined before DorkingEngine and DorkEngine to avoid forward-reference errors.""" + NAME_DORKS = [ + '"{q}" filetype:pdf', '"{q}" filetype:xlsx', '"{q}" filetype:csv', + '"{q}" filetype:doc OR filetype:docx', '"{q}" filetype:txt', + '"{q}" site:linkedin.com', '"{q}" site:facebook.com', '"{q}" site:twitter.com', + '"{q}" site:instagram.com', '"{q}" site:github.com', + '"{q}" site:pastebin.com', '"{q}" site:ghostbin.co', '"{q}" site:rentry.co', + '"{q}" site:pastebin.com "password"', '"{q}" site:pastebin.com "email"', + '"{q}" intext:"password"', '"{q}" intext:"email"', '"{q}" intext:"phone"', + '"{q}" intext:"address"', '"{q}" intext:"credentials"', + '"{q}" "database dump"', '"{q}" "INSERT INTO"', + '"{q}" site:github.com "password"', '"{q}" site:gist.github.com', + '"{q}" site:docs.google.com', '"{q}" site:trello.com', + '"{q}" filetype:pdf site:gov', '"{q}" filetype:pdf site:edu', + ] + DOMAIN_DORKS = [ + 'site:{q} filetype:sql', 'site:{q} filetype:env', 'site:{q} filetype:log', + 'site:{q} inurl:admin', 'site:{q} inurl:login', 'site:{q} inurl:wp-config', + 'site:{q} inurl:.git', 'site:{q} inurl:backup', 'site:{q} filetype:bak', + 'site:{q} "index of" password', 'site:{q} inurl:config.php', + 'site:{q} ext:conf OR ext:cnf OR ext:cfg', 'site:{q} "phpinfo()"', + 'site:{q} filetype:xml intext:password', 'site:{q} filetype:json api_key OR secret', + 'site:{q} intitle:"index of" .env', 'site:{q} ext:pem OR ext:key', + 'site:{q} "PRIVATE KEY"', 'site:{q} filetype:xlsx', 'site:{q} filetype:csv', + 'site:{q} intitle:"Dashboard" inurl:admin', 'site:{q} inurl:api password', + 'site:{q} filetype:sql "INSERT INTO"', 'site:{q} filetype:log "password"', + 'site:{q} filetype:env "DB_PASSWORD"', 'site:{q} filetype:yaml "password"', + 'site:{q} inurl:phpinfo.php', 'site:{q} inurl:.git/config', + 'site:{q} inurl:wp-config.php', 'site:{q} inurl:.env', + 'site:{q} inurl:database.yml', 'site:{q} inurl:secrets.yml', + 'site:{q} intitle:"index of" "backup"', 'site:{q} intitle:"index of" "dump"', + 'site:{q} intitle:"index of" "sql"', 'site:{q} intitle:"index of" "database"', + 'site:{q} intitle:"index of" ".env"', 'site:{q} intitle:"index of" "sql_dump"', + 'site:{q} ext:sql "sql_dump"', 'site:{q} inurl:sql_dump', + 'site:{q} intitle:"index of" "backup.sql"', 'site:{q} intitle:"index of" "dump.sql"', + ] + + +class DorkingEngine(Src): + """Passive document discovery via Google/DDG dorks + PDF/Office metadata extraction.""" + + name = "DorkingEngine" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._dead_proxies: set = set() + self._proxy_index: int = 0 + self.proxies = ProxyManager.get_proxies() + + def _get_next_proxy(self) -> Optional[str]: + live = [p for p in self.proxies if p not in self._dead_proxies] + if not live: + return None + self._proxy_index = (self._proxy_index + 1) % len(live) + return live[self._proxy_index] + + _DOC_DORKS = [ + '"{q}" filetype:pdf', + '"{q}" filetype:xlsx', + '"{q}" filetype:docx', + '"{q}" filetype:pptx', + '"{q}" filetype:log', + '"{q}" site:pastebin.com', + '"{q}" site:docs.google.com', + '"{q}" site:drive.google.com', + '"{q}" filetype:pdf site:gov', + '"{q}" filetype:pdf site:edu', + '"{q}" filetype:xlsx site:gov', + ] + + _META_RE = { + "author": re.compile(rb"/Author\s*\(([^)]{1,120})\)", re.I), + "creator": re.compile(rb"/Creator\s*\(([^)]{1,120})\)", re.I), + "software": re.compile(rb"/Producer\s*\(([^)]{1,120})\)", re.I), + "local_paths": re.compile(rb"(?:[A-Za-z]:\\|/home/|/root/|/var/|/etc/)(?:[^\x00-\x1f\r\n]{1,200})", re.I), + "emails": re.compile(rb"[\w.+-]{1,64}@[\w-]{1,63}\.[\w.]{2,10}", re.I), + } + + def generate_queries(self, target: str, qtype: str = "email") -> List[str]: + if qtype == "name": + templates = _DorkTemplates.NAME_DORKS + elif qtype == "domain": + templates = _DorkTemplates.DOMAIN_DORKS + else: + templates = self._DOC_DORKS + return [d.replace("{q}", target) for d in templates] + + @staticmethod + async def extract_metadata(url: str, session) -> dict: + meta = {"author": "", "creator": "", "software": "", "local_paths": [], "emails": []} + try: + async with session.get(url, timeout=aiohttp_mod.ClientTimeout(total=15), + headers={"User-Agent": random.choice(_UA_POOL)}) as resp: + if resp.status != 200: + return meta + chunk = await resp.content.read(131072) # 128 KB + for key, pat in DorkingEngine._META_RE.items(): + hits = pat.findall(chunk) + if not hits: + continue + decoded = [h.decode("latin-1", errors="replace").strip() for h in hits] + if key in ("local_paths", "emails"): + meta[key] = list(dict.fromkeys(decoded))[:10] + else: + meta[key] = decoded[0] + except Exception: + pass + return meta + + async def _ddg_search(self, query: str, _session=None) -> List[dict]: + """DDG search with proxy rotation and circuit-breaker (max 3 retries).""" + if not aiohttp_mod: + return [] + try: + from aiohttp_socks import ProxyConnector as _ProxyConnector + except ImportError: + _ProxyConnector = None + url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}" + for attempt in range(3): + proxy = self._get_next_proxy() + ua = random.choice(_UA_POOL) + headers = {"User-Agent": ua} + try: + if proxy and _ProxyConnector: + connector = _ProxyConnector.from_url(proxy) + else: + connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX) + # Create session once per attempt; close it before the next retry. + async with aiohttp_mod.ClientSession(connector=connector) as sess: + async with sess.get(url, headers=headers, + timeout=aiohttp_mod.ClientTimeout(total=12)) as resp: + if resp.status in (403, 429): + self._dead_proxies.add(proxy) + next_p = self._get_next_proxy() + logger.warning("[!] Proxy Ban detected. Rotating to %s...", next_p) + continue + text = await resp.text(errors="replace") + hits = [] + for m in re.finditer(r'class="result__url"[^>]*>([^<]+)<', text): + raw = m.group(1).strip() + if raw: + hits.append({"url": raw if raw.startswith("http") else "https://" + raw, + "title": "", "dork": query}) + return hits[:5] + except Exception: + if proxy: + self._dead_proxies.add(proxy) + return [] + + async def async_search(self, session, query: str, qtype: str) -> List[Record]: + if not aiohttp_mod: + return [] + + dorks = self.generate_queries(query, qtype) + seen_urls: Set[str] = set() + + async def _process_dork(dork: str) -> List[Tuple]: + await asyncio.sleep(random.uniform(0.5, 2.0)) + hits = await self._ddg_search(dork) + rows = [] + for hit in hits: + url = hit.get("url", "") + if not url or url in seen_urls: + continue + seen_urls.add(url) + ext = url.lower().rsplit(".", 1)[-1].split("?")[0] if "." in url else "" + meta = await DorkingEngine.extract_metadata(url, session) if ext in ("pdf", "xlsx", "docx", "pptx", "log") else {} + rows.append((url, ext, meta, dork)) + return rows + + all_rows = [] + for batch in [dorks[i:i+5] for i in range(0, len(dorks), 5)]: + results = await asyncio.gather(*[_process_dork(d) for d in batch], return_exceptions=True) + for r in results: + if isinstance(r, list): + all_rows.extend(r) + + records = [ + Record(source="DorkingEngine", email=query, + raw_data={"url": url, "dork": dork}, metadata=meta) + for url, ext, meta, dork in all_rows + ] + + if all_rows and aiosqlite: + try: + async with aiosqlite.connect(self._db.path) as db: + await db.executemany( + "INSERT OR IGNORE INTO dork_results " + "(source_url, file_type, metadata_json, parent_target) " + "VALUES (?,?,?,?)", + [(url, ext, json.dumps(meta), query) for url, ext, meta, _ in all_rows]) + await db.commit() + except Exception as exc: + logger.debug("dork_results persist failed: %s", exc) + return records + + def search(self, query: str, qtype: str) -> List[Record]: + # sync fallback — not used when aiohttp is available + return [] + + +# ======================================================================= +# DORK ENGINE +# ======================================================================= +class DorkEngine: + # Delegate to _DorkTemplates to avoid duplication + NAME_DORKS = _DorkTemplates.NAME_DORKS + DOMAIN_DORKS = _DorkTemplates.DOMAIN_DORKS + EMAIL_DORKS = [ + '"{q}" filetype:sql password', '"{q}" filetype:env', '"{q}" filetype:log password', + '"{q}" filetype:txt intext:password', '"{q}" filetype:csv email password', + '"{q}" filetype:xlsx password', '"{q}" filetype:cfg password', '"{q}" filetype:conf password', + '"{q}" filetype:bak password', '"{q}" filetype:json api_key', '"{q}" filetype:yaml password', + '"{q}" site:pastebin.com', '"{q}" site:ghostbin.co', '"{q}" site:rentry.co', + '"{q}" site:justpaste.it', '"{q}" site:dpaste.org', '"{q}" site:paste.ee', + '"{q}" site:hastebin.com', '"{q}" site:privatebin.net', '"{q}" site:controlc.com', + '"{q}" site:github.com password', '"{q}" site:gitlab.com password', + '"{q}" site:docs.google.com', '"{q}" site:trello.com', '"{q}" site:mega.nz', + '"{q}" intext:"password" intext:"username"', '"{q}" intext:"credentials" filetype:txt', + '"{q}" filetype:env DB_PASSWORD', '"{q}" filetype:env "API_KEY"', + '"{q}" ext:sql "INSERT INTO" -git', '"{q}" ext:json "password"', + '"{q}" ext:yml "password"', '"{q}" ext:yaml "api_key"', + '"{q}" intitle:"index of" "passwords.txt"', '"{q}" intitle:"index of" "credentials.txt"', + '"{q}" inurl:passlist.txt', '"{q}" inurl:passwords.txt', '"{q}" inurl:credentials.txt', + '"{q}" "database dump" filetype:sql', '"{q}" "INSERT INTO" "password"', + '"{q}" site:pastebin.com "password"', '"{q}" site:pastebin.com "credentials"', + '"{q}" site:github.com "password"', '"{q}" site:gist.github.com "password"', + ] + + def __init__(self, session: "Session") -> None: + self.s = session + + def run(self, q: str, qt: str, engines: List[str] = None) -> List[dict]: + """ + Parallelised dork runner. + All (dork, engine) pairs are dispatched concurrently via a thread pool. + Per-engine jitter is applied inside _search so the sleep is not sequential. + Total wall-clock time ≈ max(single_request_time) instead of O(n_dorks × sleep). + """ + if engines is None: + engines = ["google", "bing", "ddg"] + dorks = self.EMAIL_DORKS if qt == "email" else self.DOMAIN_DORKS if qt == "domain" else self.NAME_DORKS if qt == "name" else self.EMAIL_DORKS[:20] + dorks = dorks[:Cfg.DORK_MAX] + + from concurrent.futures import ThreadPoolExecutor, as_completed as _as_completed + + def _run_one(dork: str, eng: str) -> List[dict]: + query = dork.replace("{q}", q) + # Per-engine jitter — applied once per (dork, engine) pair, not per dork + time.sleep(random.uniform(*Cfg.DORK_DELAY)) + hits = self._search(query, eng) + for h in hits: + h["dork"] = query + h["engine"] = eng + return hits + + results = [] + pairs = [(dork, eng) for dork in dorks for eng in engines] + if not pairs: + return [] + max_workers = min(len(pairs), 12) # cap threads to avoid hammering search engines + with ThreadPoolExecutor(max_workers=max_workers) as pool: + futures = {pool.submit(_run_one, d, e): (d, e) for d, e in pairs} + for fut in _as_completed(futures): + try: + results.extend(fut.result()) + except Exception: + pass + + seen = set() + unique = [] + for r in results: + key = r.get("url", r.get("title", "")) + if key not in seen: + seen.add(key) + unique.append(r) + return unique + + def _search(self, query: str, engine: str) -> List[dict]: + hits = [] + try: + urls = { + "google": f"https://www.google.com/search?q={urllib.parse.quote(query)}&num=10", + "bing": f"https://www.bing.com/search?q={urllib.parse.quote(query)}&count=10", + "ddg": f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}", + } + resp = self.s.get(urls.get(engine, urls["google"]), timeout=15, use_cloudscraper=True) + if not resp.ok or not BeautifulSoup: + return hits + soup = BeautifulSoup(resp.text, "html.parser") + selectors = { + "google": ("div.g", "h3", "a[href]", ".VwiC3b"), + "bing": ("li.b_algo", "h2", "a", ".b_caption p"), + "ddg": (".result", ".result__title", ".result__url", ".result__snippet"), + } + container, title_sel, link_sel, snippet_sel = selectors.get(engine, selectors["google"]) + for item in soup.select(container)[:10]: + title_el = item.select_one(title_sel) + link_el = item.select_one(link_sel) + snip_el = item.select_one(snippet_sel) + if title_el: + url = link_el.get("href","") if link_el else "" + hits.append({ + "title": title_el.get_text().strip(), + "url": url if url.startswith("http") else "", + "snippet": snip_el.get_text().strip() if snip_el else "", + }) + except Exception: + pass + return hits + + +# ======================================================================= +# SCRAPE ENGINE — Telegram indexer + advanced dorks + regex extraction +# ======================================================================= +class ScrapeEngine: + PASTE_SITES = [ + ("Pastebin", "https://psbdmp.ws/api/v3/search/{q}", "json"), + ("IntelX", "https://2.intelx.io/intelligent/search", "intelx"), + ("Paste.ee", "https://api.paste.ee/v1/search?query={q}", "json"), + ("Rentry", "https://rentry.co/api/search?q={q}", "json"), + ("Ghostbin", "https://ghostbin.com/api/search?q={q}", "json"), + ("JustPaste", "https://justpaste.it/api/search?q={q}", "json"), + ("DPaste", "https://dpaste.org/api/search?q={q}", "json"), + ("Hastebin", "https://hastebin.com/api/search?q={q}", "json"), + ("PrivateBin", "https://privatebin.net/api/search?q={q}", "json"), + ("ControlC", "https://controlc.com/api/search?q={q}", "json"), + ("Paste2", "https://paste2.org/api/search?q={q}", "json"), + ("PastebinPro", "https://pastebin.com/api/api_search.php?q={q}", "xml"), + ] + + CRED_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.-]+\s*[:;|]\s*\S+", re.IGNORECASE) + EMAIL_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.]+") + HASH_RE = re.compile(r"\b[a-f0-9]{32,128}\b", re.IGNORECASE) + COMBO_RE = re.compile(r"^[^:]+:[^:]+$", re.MULTILINE) + + PATTERNS = [ + (re.compile(r"(?:password|passwd|pass|pwd)\s*[:=]\s*\S+", re.I), "Password"), + (re.compile(r"(?:api[_-]?(?:key|secret)|access_token|auth_token)\s*[:=]\s*['\"]?[A-Za-z0-9_\-]{16,}", re.I), "API Key/Token"), + (re.compile(r"AKIA[0-9A-Z]{16}"), "AWS Access Key"), + (re.compile(r"(?:aws_secret|secret_access_key)\s*[:=]\s*[A-Za-z0-9/+=]{40}", re.I), "AWS Secret Key"), + (re.compile(r"-----BEGIN (?:RSA|EC|OPENSSH )?PRIVATE KEY-----"), "Private Key"), + (re.compile(r"(?:mysql|postgres|mongodb|redis|mssql)://[^\s\"'<>]{8,}", re.I), "DB Connection"), + (re.compile(r"eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}"), "JWT Token"), + (re.compile(r"xox[baprs]-[0-9A-Za-z-]+"), "Slack Token"), + (re.compile(r"https://hooks\.slack\.com/services/T[A-Z0-9]+/B[A-Z0-9]+/[A-Za-z0-9]+"), "Slack Webhook"), + (re.compile(r"gh[pousr]_[A-Za-z0-9]{36}"), "GitHub Token"), + (re.compile(r"glpat-[A-Za-z0-9_-]{20,}"), "GitLab Token"), + (re.compile(r"ya29\.[A-Za-z0-9_-]+"), "Google OAuth"), + (re.compile(r"AIza[0-9A-Za-z_-]{35}"), "Google API Key"), + (re.compile(r"sk_live_[0-9a-zA-Z]{24}"), "Stripe Live Key"), + (re.compile(r"sk_test_[0-9a-zA-Z]{24}"), "Stripe Test Key"), + (re.compile(r"rk_live_[0-9a-zA-Z]{24}"), "Stripe Restricted Key"), + (re.compile(r"[MN][A-Za-z\d]{23}\.[\w-]{6}\.[\w-]{27}"), "Discord Token"), + (re.compile(r"\d{8,10}:[A-Za-z0-9_-]{35,40}"), "Telegram Bot Token"), + (re.compile(r"EAACEdEose0cBA[0-9A-Za-z]+"), "Facebook Token"), + (re.compile(r"\b[a-f0-9]{32}\b", re.I), "MD5 Hash"), + (re.compile(r"\b[a-f0-9]{40}\b", re.I), "SHA1 Hash"), + (re.compile(r"\b[a-f0-9]{64}\b", re.I), "SHA256 Hash"), + (re.compile(r"\$2[aby]\$\d{2}\$[./A-Za-z0-9]{53}"), "Bcrypt Hash"), + (re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}"), "Email"), + ] + + TELEGRAM_CTI_CHANNELS = [ + "leakbase", "breachforums", "darkleaks", "combolist", "databreach", + "leakednews", "cybercrime", "hackersnews", "threatintel", "darkweb", + ] + + def __init__(self, session: "Session", db: "DB") -> None: + self.s = session + self.db = db + + def run(self, q: str, qt: str) -> dict: + results = {"pastes": [], "credentials": [], "hashes": [], "telegram": [], "dork_misconfigs": []} + + # Phase 1: Paste sites + import xml.etree.ElementTree as ET + for name, url, fmt in self.PASTE_SITES: + try: + if fmt == "json": + resp = self.s.get(url.replace("{q}", urllib.parse.quote(q)), timeout=12) + if resp.ok: + data = resp.json() if isinstance(resp.json(), list) else resp.json().get("data",[]) + for p in (data or [])[:Cfg.PASTE_MAX]: + pid = p.get("id","") if isinstance(p,dict) else str(p) + results["pastes"].append({"site":name,"id":pid,"data":p}) + elif fmt == "xml": + resp = self.s.get(url.replace("{q}", urllib.parse.quote(q)), timeout=12) + if resp.ok: + root = ET.fromstring(resp.text) + for item in root.findall(".//item")[:Cfg.PASTE_MAX]: + pid = item.findtext("key") or item.findtext("id") or "" + results["pastes"].append({"site":name,"id":pid,"data":item}) + elif fmt == "intelx": + key = Vault.get("INTELX_API_KEY") or self.db.get_key("intelx_api_key") + if key: + resp = self.s.post(url, json_data={"term":q,"maxresults":Cfg.PASTE_MAX,"media":0,"target":0}, extra_headers={"x-key":key}, timeout=15) + if resp.ok: + sid = resp.json().get("id") + if sid: + # Exponential backoff poll + _delay = 2 + for _attempt in range(4): + time.sleep(_delay) + res = self.s.get(f"https://2.intelx.io/intelligent/search/result?id={sid}", extra_headers={"x-key":key}, timeout=15) + if res.ok: + records_data = res.json().get("records", []) + if records_data: + for r in records_data[:Cfg.PASTE_MAX]: + results["pastes"].append({"site":"IntelX","id":r.get("systemid",""),"data":r}) + break + _delay = min(_delay * 2, 16) # cap at 16s + except Exception: + continue + + # Phase 2: Extract credentials from paste content + for paste in results["pastes"][:Cfg.PASTE_MAX]: + try: + content = self._fetch_content(paste) + if content: + for c in self.CRED_RE.findall(content)[:50]: + results["credentials"].append({"raw":c,"source":paste.get("site",""),"paste_id":paste.get("id","")}) + for h in self.HASH_RE.findall(content)[:20]: + results["hashes"].append({"hash":h,"source":paste.get("site",""),"paste_id":paste.get("id","")}) + for combo in self.COMBO_RE.findall(content)[:50]: + if ":" in combo: + email, pw = combo.split(":",1) + if "@" in email and len(pw) > 0: + results["credentials"].append({"raw":combo,"source":paste.get("site",""),"paste_id":paste.get("id","")}) + found_patterns: Dict[str, List] = {} + for pat, label in self.PATTERNS: + matches = pat.findall(content) + if matches: + found_patterns[label] = matches[:10] + if found_patterns: + paste["patterns"] = found_patterns + except Exception: + continue + + # Phase 3: Public Telegram Indexer + results["telegram"] = self._telegram_index(q, qt) + + # Phase 4: Advanced misconfiguration search + results["dork_misconfigs"] = self._dork_misconfigs(q, qt) + + # Phase 5: DDG search for leaked data + _ddg_queries = { + "name": [f'"{q}" password leak', f'"{q}" database dump', f'"{q}" site:pastebin.com', f'"{q}" credentials'], + "email": [f'"{q}" password leak', f'"{q}" database dump'], + "domain": [f'site:{q} password', f'"{q}" database dump'], + } + for sq in _ddg_queries.get(qt, [f'"{q}" password leak', f'"{q}" database dump']): + try: + resp = self.s.get(f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(sq)}", timeout=10, use_cloudscraper=True) + if resp.ok and BeautifulSoup: + soup = BeautifulSoup(resp.text, "html.parser") + for r in soup.select(".result")[:5]: + title_el = r.select_one(".result__title") + if title_el: + results["pastes"].append({"site":"DDG","title":title_el.get_text().strip(),"query":sq}) + except Exception: + continue + + return results + + def _telegram_index(self, q: str, qt: str) -> List[dict]: + """ + Parse public Telegram web-gateway previews to index public CTI + telemetry and threat actor communications. + """ + hits = [] + targets = [q] if qt in ("username", "domain", "name") else [] + targets += self.TELEGRAM_CTI_CHANNELS + for channel in targets: + try: + resp = self.s.get(f"https://t.me/s/{urllib.parse.quote(channel)}", timeout=10, use_cloudscraper=True) + if not resp.ok or not BeautifulSoup: + continue + soup = BeautifulSoup(resp.text, "html.parser") + msgs = soup.select(".tgme_widget_message_text") + for msg in msgs[:20]: + text = msg.get_text(separator=" ").strip() + if not text: + continue + # Check if query appears in message + if q.lower() in text.lower() or qt == "username": + found_patterns: Dict[str, List] = {} + for pat, label in self.PATTERNS: + matches = pat.findall(text) + if matches: + found_patterns[label] = matches[:5] + hits.append({ + "channel": channel, + "text": text[:500], + "patterns": found_patterns, + "contains_target": q.lower() in text.lower(), + }) + except Exception: + continue + return hits + + def _dork_misconfigs(self, q: str, qt: str) -> List[dict]: + """ + Automate search queries for exposed public misconfigurations + (index of, .env, sql_dump files) associated with the target domain. + """ + hits = [] + if qt not in ("domain", "email", "name"): + return hits + if qt == "name": + dorks = [ + f'"{q}" filetype:pdf', f'"{q}" filetype:xlsx', + f'"{q}" site:pastebin.com', f'"{q}" intext:"password"', + f'"{q}" "database dump"', f'"{q}" site:github.com', + ] + else: + target = q if qt == "domain" else q.split("@")[1] if "@" in q else q + dorks = [ + f'site:{target} intitle:"index of"', + f'site:{target} intitle:"index of" ".env"', + f'site:{target} intitle:"index of" "sql_dump"', + f'site:{target} intitle:"index of" "backup"', + f'site:{target} ext:env', + f'site:{target} ext:sql', + f'"{target}" filetype:env', + f'"{target}" filetype:sql "sql_dump"', + ] + for dork in dorks: + try: + resp = self.s.get(f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(dork)}", timeout=10, use_cloudscraper=True) + if resp.ok and BeautifulSoup: + soup = BeautifulSoup(resp.text, "html.parser") + for r in soup.select(".result")[:5]: + title_el = r.select_one(".result__title") + url_el = r.select_one(".result__url") + if title_el: + hits.append({ + "dork": dork, + "title": title_el.get_text().strip(), + "url": url_el.get_text().strip() if url_el else "", + }) + time.sleep(random.uniform(2.0, 4.0)) + except Exception: + continue + return hits + + def _fetch_content(self, paste: dict) -> str: + try: + site = paste.get("site","") + pid = paste.get("id","") + data = paste.get("data",{}) + if not pid: + return "" + raw_urls = { + "Pastebin": f"https://psbdmp.ws/api/v3/dump/{pid}", + "Rentry": f"https://rentry.co/api/raw/{pid}", + "Hastebin": f"https://hastebin.com/raw/{pid}", + "DPaste": f"https://dpaste.org/{pid}/raw/", + "Ghostbin": f"https://ghostbin.com/paste/{pid}/raw", + "JustPaste": f"https://justpaste.it/{pid}", + "PrivateBin": f"https://privatebin.net/?{pid}", + "ControlC": f"https://controlc.com/{pid}", + "Paste2": f"https://paste2.org/raw/{pid}", + "PastebinPro":f"https://pastebin.com/raw/{pid}", + } + if site == "IntelX": + key = self.db.get_key("intelx") + if key: + resp = self.s.get(f"https://2.intelx.io/file/read?type=1&systemid={pid}&k={key}", timeout=15) + if resp.ok: + return resp.text[:10000] + elif site == "Paste.ee": + resp = self.s.get(f"https://api.paste.ee/v1/pastes/{pid}", timeout=10) + if resp.ok: + sections = resp.json().get("paste",{}).get("sections",[]) + return "\n".join(s.get("contents","") for s in sections)[:10000] + elif site in raw_urls: + resp = self.s.get(raw_urls[site], timeout=10) + if resp.ok and resp.text: + return resp.text[:10000] + if isinstance(data, dict): + for k in ("content","text","body","raw","paste"): + if data.get(k): + return str(data[k])[:10000] + except Exception: + pass + return "" + + @staticmethod + async def extract_patterns(text: str) -> dict: + patterns = { + "phones": r'\+[1-9]\d{1,14}\b', + "addresses": r'\d+\s+[A-Za-z0-9\s]+(?:Street|St|Avenue|Ave|Road|Rd|Via|Piazza|Corso|Largo)\W+[A-Za-z\s]+', + "handles": r'@[A-Za-z0-9_]+', + } + await asyncio.sleep(0) + return {key: re.findall(pattern, text) for key, pattern in patterns.items()} + + +# ======================================================================= +# HASH ENGINE +# ======================================================================= +class HashEngine: + TYPES = [ + ("MD5", re.compile(r"^[a-f0-9]{32}$", re.I), "md5"), + ("SHA1", re.compile(r"^[a-f0-9]{40}$", re.I), "sha1"), + ("SHA224", re.compile(r"^[a-f0-9]{56}$", re.I), "sha224"), + ("SHA256", re.compile(r"^[a-f0-9]{64}$", re.I), "sha256"), + ("SHA384", re.compile(r"^[a-f0-9]{96}$", re.I), "sha384"), + ("SHA512", re.compile(r"^[a-f0-9]{128}$", re.I), "sha512"), + ("NTLM", re.compile(r"^[a-f0-9]{32}$", re.I), "ntlm"), + ("MySQL", re.compile(r"^\*[A-F0-9]{40}$"), "mysql"), + ("bcrypt", re.compile(r"^\$2[aby]?\$\d{2}\$"), "bcrypt"), + ("Argon2", re.compile(r"^\$argon2"), "argon2"), + ("SHA512Crypt", re.compile(r"^\$6\$"), "sha512crypt"), + ("SHA256Crypt", re.compile(r"^\$5\$"), "sha256crypt"), + ("MD5Crypt", re.compile(r"^\$1\$"), "md5crypt"), + ("WordPress", re.compile(r"^\$P\$"), "wordpress"), + ("phpBB", re.compile(r"^\$H\$"), "phpbb"), + ("Drupal", re.compile(r"^\$S\$"), "drupal"), + ("Django-SHA256",re.compile(r"^pbkdf2_sha256\$"), "django"), + ("LM", re.compile(r"^[a-f0-9]{32}$", re.I), "lm"), + ("CRC32", re.compile(r"^[a-f0-9]{8}$", re.I), "crc32"), + ] + + COMMON_PASS = [ + "password","123456","12345678","qwerty","abc123","monkey","1234567","letmein", + "trustno1","dragon","baseball","iloveyou","master","sunshine","ashley","bailey", + "shadow","123123","654321","superman","qazwsx","michael","football","password1", + "password123","admin","admin123","root","toor","test","guest","welcome","login", + "pass","pass123","1234","12345","123456789","1234567890","0987654321","111111", + "666666","888888","000000","P@ssw0rd","P@ss1234","Welcome1","Ch@ngeme","Qwerty123", + "Summer2024","Winter2025","Spring2024","Fall2024","Password123!","Admin@123", + "Root@123","Qwerty@123","1qaz2wsx","1qaz@WSX","q1w2e3r4","Password1!", + "Admin123!","Welcome@2025","Changeme123","P@ssword2025","Secure@123", + ] + + LEET_MAP = {"a":"@4","e":"3","i":"1!","o":"0","s":"$5","t":"7","l":"1","g":"9","b":"8"} + + def __init__(self, db: "DB", session: "Session" = None) -> None: + self.db = db + self._session = session + + def identify(self, h: str) -> List[Tuple[str, str]]: + types = [(name, tag) for name, pat, tag in self.TYPES if pat.match(h)] + # For 32-char hex, MD5/NTLM/LM all match the same pattern. + # Return only MD5 (most common in breach data) to avoid wasting + # crack cycles on tags that have no hashlib implementation. + if len(types) > 1: + seen_tags: set = set() + deduped = [] + for name, tag in types: + if tag not in seen_tags: + seen_tags.add(tag) + deduped.append((name, tag)) + # If the set contains md5/ntlm/lm ambiguity, keep only md5 + tags = {t for _, t in deduped} + if "md5" in tags and ("ntlm" in tags or "lm" in tags): + deduped = [(n, t) for n, t in deduped if t not in ("ntlm", "lm")] + types = deduped + return types if types else [("Unknown", "unknown")] + + def crack(self, h: str) -> dict: + cached = self.db.get_plain(h) + if cached: + return {"hash":h,"plaintext":cached,"method":"Cache","types":self.identify(h)} + types = self.identify(h) + result = {"hash":h,"plaintext":None,"method":None,"types":types} + for fn, method in [(self._dict_attack,"Dictionary+Mutations"),(self._online,"Online Rainbow"),(self._hashmob,"Hashmob Community"),(self._extended,"Extended Mutations")]: + plain = fn(h) if fn != self._dict_attack else fn(h, types) + if plain: + result["plaintext"] = plain + result["method"] = method + self._cache(h, plain, method) + return result + return result + + def _dict_attack(self, h: str, types: list) -> Optional[str]: + h_low = h.lower() + for pw in self.COMMON_PASS: + for mutation in self._mutate(pw): + for _, tag in types: + try: + if tag == "md5" and hashlib.md5(mutation.encode()).hexdigest() == h_low: return mutation + if tag == "sha1" and hashlib.sha1(mutation.encode()).hexdigest() == h_low: return mutation + if tag == "sha256" and hashlib.sha256(mutation.encode()).hexdigest() == h_low: return mutation + if tag == "sha512" and hashlib.sha512(mutation.encode()).hexdigest() == h_low: return mutation + except Exception: continue + return None + + def _mutate(self, word: str) -> List[str]: + mutations = [word, word.upper(), word.lower(), word.capitalize(), + word+"!", word+"1", word+"123", word+"@", word+"#", + word+"2024", word+"2025", word[::-1], word+word, + word.capitalize()+"!", word.capitalize()+"1", + word+"!@#", word+"123!", word+"123@", word+"123#"] + leet = word.lower() + for c, replacements in self.LEET_MAP.items(): + for r in replacements: + mutations.append(leet.replace(c, r, 1)) + return list(set(mutations)) + + def _online(self, h: str) -> Optional[str]: + apis = [ + (f"https://www.nitrxgen.net/md5db/{h}", "text"), + (f"https://hashes.org/api.php?key=&query={h}", "json"), + (f"https://hash.help/api/lookup/{h}", "json"), + (f"https://hashkiller.io/api/search.php?hash={h}", "json"), + ] + _get = self._session.get if self._session else (lambda url, **kw: Session._null_response(url)) + for url, fmt in apis: + try: + resp = _get(url, timeout=8) + if not resp.ok: continue + if fmt == "text": + text = resp.text.strip() + if not text or len(text) >= 100: + continue + tl = text.lower() + if any(tl.startswith(p) for p in ("not found", "error", "invalid", "no result", "not in", "cmd5-error", "not exist", "code erreur", "erreur", "unknown")): + continue + return text + elif fmt == "json": + data = resp.json() + if data.get("result") or data.get("plaintext"): + return data.get("result", data.get("plaintext","")) + except Exception: continue + return None + + def _hashmob(self, h: str) -> Optional[str]: + try: + if not self._session: return None + resp = self._session.post("https://hashmob.net/api/v2/search", json_data={"hash":h}, timeout=10) + if resp.ok: + data = resp.json() + if data.get("found") and data.get("result"): + return data["result"] + except Exception: pass + return None + + def _extended(self, h: str) -> Optional[str]: + extra = ["password!","admin!","root123","test1234","welcome1","changeme","P@ssword1","Passw0rd!","S3cure!","l3tm3in","p4ssw0rd","Summer2024","Winter2025"] + h_low = h.lower() + types = self.identify(h) + for pw in extra: + for mutation in self._mutate(pw): + for _, tag in types: + try: + if tag == "md5" and hashlib.md5(mutation.encode()).hexdigest() == h_low: return mutation + if tag == "sha1" and hashlib.sha1(mutation.encode()).hexdigest() == h_low: return mutation + if tag == "sha256" and hashlib.sha256(mutation.encode()).hexdigest() == h_low: return mutation + except Exception: continue + return None + + def _cache(self, h: str, p: str, m: str) -> None: + try: self.db.store_hash(h, "", p, m) + except Exception: pass + + +# ======================================================================= +# PASSWORD ANALYZER +# ======================================================================= +class PassAnalyzer: + KEYBOARD_WALKS = ["qwerty","qwertz","azerty","asdf","zxcv","qwer","1234","4321","1qaz","2wsx","3edc","4rfv","5tgb","6yhn","7ujm","qazwsx","zxcvbn","poiuyt","1qaz2wsx","q1w2e3r4","qwertyuiop","asdfghjkl","zxcvbnm"] + DATE_PATS = [re.compile(r"\d{4}[-/]\d{2}[-/]\d{2}"), re.compile(r"\d{2}[-/]\d{2}[-/]\d{4}"), re.compile(r"(?:19|20)\d{2}"), re.compile(r"\d{8}")] + LEET_REV = {"@":"a","4":"a","3":"e","1":"il","!":"i","0":"o","$":"s","5":"s","7":"t","9":"g","8":"b"} + _COMMON_FALLBACK = {"password","123456","12345678","qwerty","abc123","monkey","1234567","letmein","trustno1","dragon","baseball","iloveyou","master","sunshine","ashley","bailey","shadow","123123","654321","superman","qazwsx","michael","football","password1","admin","root","welcome","login","test","guest","pass","qwertyuiop","qwerty123","passw0rd","P@ssw0rd","admin123","root123","welcome1","login123","test123","guest123","password123"} + + @classmethod + def _load_common(cls) -> set: + """Load wordlist from ~/.nox/wordlists/ if available, else use fallback set.""" + for name in ("10k-most-common.txt", "common-passwords.txt", "rockyou-top1000.txt"): + p = Cfg.WORDLISTS / name + if p.exists(): + try: + words = {l.strip().lower() for l in p.read_text(errors="ignore").splitlines() if l.strip()} + if words: + return words + except Exception: + pass + return cls._COMMON_FALLBACK + + @classmethod + def _get_common(cls) -> set: + if not hasattr(cls, "_common_cache"): + cls._common_cache = cls._load_common() + return cls._common_cache + + def analyze(self, password: str) -> dict: + length = len(password) + charsets = 0; charset_names = [] + if re.search(r"[a-z]", password): charsets += 26; charset_names.append("lowercase") + if re.search(r"[A-Z]", password): charsets += 26; charset_names.append("uppercase") + if re.search(r"[0-9]", password): charsets += 10; charset_names.append("digits") + if re.search(r"[^a-zA-Z0-9]", password): charsets += 33; charset_names.append("symbols") + entropy = length * math.log2(max(charsets, 1)) if charsets else 0 + patterns = []; penalties = 0 + if password.lower() in self._get_common(): + patterns.append("Common password (top 10K)"); penalties += 40 + for walk in self.KEYBOARD_WALKS: + if walk in password.lower(): + patterns.append(f"Keyboard walk: {walk}"); penalties += 15; break + for pat in self.DATE_PATS: + if pat.search(password): + patterns.append("Date pattern detected"); penalties += 10; break + if re.search(r"(.)\1{2,}", password): + patterns.append("Repeated characters"); penalties += 10 + deleet = password + for leet, orig in self.LEET_REV.items(): + deleet = deleet.replace(leet, orig[0]) + if deleet.lower() != password.lower() and deleet.lower() in self._get_common(): + patterns.append(f"Leet speak of common password: {deleet.lower()}"); penalties += 30 + raw_score = min(100, int(entropy * 1.5)) + final_score = max(0, raw_score - penalties) + speeds = [("Online (10/s)",10),("Throttled (1K/s)",1000),("Offline fast (1B/s)",1_000_000_000),("GPU cluster (100B/s)",100_000_000_000)] + crack_times = {} + for label, speed in speeds: + # Use logarithms to avoid OverflowError on very long passwords + if charsets <= 1 or length == 0: + secs = 0.0 + else: + log_secs = length * math.log10(max(charsets, 1)) - math.log10(speed) + secs = 0.0 if log_secs < 0 else (float('inf') if log_secs > 300 else 10 ** log_secs) + if secs == 0.0 or secs < 1: crack_times[label] = "Instant" + elif math.isinf(secs): crack_times[label] = "> 10^300 years" + elif secs < 60: crack_times[label] = f"{secs:.0f} seconds" + elif secs < 3600: crack_times[label] = f"{secs/60:.0f} minutes" + elif secs < 86400: crack_times[label] = f"{secs/3600:.0f} hours" + elif secs < 86400*365: crack_times[label] = f"{secs/86400:.0f} days" + elif secs < 86400*365*1000: crack_times[label] = f"{secs/(86400*365):.0f} years" + else: crack_times[label] = f"{secs/(86400*365):.2e} years" + if final_score >= 80: strength = "VERY STRONG" + elif final_score >= 60: strength = "STRONG" + elif final_score >= 40: strength = "MODERATE" + elif final_score >= 20: strength = "WEAK" + else: strength = "VERY WEAK" + return {"password":password,"length":length,"entropy":round(entropy,2),"charsets":charset_names,"charset_size":charsets,"patterns":patterns,"penalties":penalties,"score":final_score,"raw_score":raw_score,"strength":strength,"crack_times":crack_times} + + +# ======================================================================= +# CREDENTIAL ANALYZER — Temporal Correlation & Deduplication +# ======================================================================= +class CredAnalyzer: + @staticmethod + def analyze(records: list) -> dict: + if not records: + return {} + emails: Dict[str,int] = {}; passwords: Dict[str,int] = {}; domains: Dict[str,int] = {} + timeline = []; stealer_logs = [] + total_crit = total_high = total_med = 0 + dedup_seen: Set[str] = set() + unique_records = [] + + for r in records: + dk = r.dedup_key() if hasattr(r, "dedup_key") else "" + if dk and dk in dedup_seen: + continue + if dk: + dedup_seen.add(dk) + unique_records.append(r) + + em = _rec_get(r, "email") + pw = _rec_get(r, "password") + dom = _rec_get(r, "domain") + sev = _rec_get(r, "severity") or Severity.INFO + if em: emails[em] = emails.get(em, 0) + 1 + if pw: passwords[pw] = passwords.get(pw, 0) + 1 + if dom: domains[dom] = domains.get(dom, 0) + 1 + bd = _rec_get(r, "breach_date") + if bd: + timeline.append({"date":bd,"breach":_rec_get(r,"breach_name"),"severity":sev.name if isinstance(sev,Severity) else str(sev)}) + if any(x in str(_rec_get(r,"data_types") or []).lower() for x in ["stealer","redline","raccoon","vidar","infostealer"]): + stealer_logs.append(r) + sev_name = sev.name if isinstance(sev, Severity) else str(sev).upper() + if sev_name == "CRITICAL": total_crit += 1 + elif sev_name == "HIGH": total_high += 1 + elif sev_name == "MEDIUM": total_med += 1 + + reused = {pw: cnt for pw, cnt in passwords.items() if cnt > 1} + score = min(100, total_crit*25 + total_high*10 + total_med*3 + len(stealer_logs)*20 + len(reused)*15) + timeline.sort(key=lambda x: x.get("date","")) + + persistence_scores = [getattr(r,"persistence_score",0.0) for r in unique_records if getattr(r,"persistence_score",0.0) > 0] + avg_persistence = round(sum(persistence_scores)/len(persistence_scores),1) if persistence_scores else 0.0 + + return { + "total_records": len(records), + "unique_records": len(unique_records), + "unique_emails": len(emails), + "top_emails": sorted(emails.items(), key=lambda x: -x[1])[:10], + "unique_passwords": len(passwords), + "passwords_found": len(passwords), + "reused_passwords": reused, + "unique_domains": len(domains), + "top_domains": sorted(domains.items(), key=lambda x: -x[1])[:10], + "stealer_logs": len(stealer_logs), + "hvt_count": sum(1 for r in unique_records if getattr(r, "is_hvt", False) or (isinstance(r, dict) and r.get("is_hvt"))), + "severity": {"critical":total_crit,"high":total_high,"medium":total_med}, + "risk_score": score, + "timeline": timeline[:20], + "avg_persistence": avg_persistence, + } + + +# ======================================================================= +# PIVOT MANAGER — Recursive Data Enrichment Engine +# ======================================================================= +class PivotManager: + """ + Builds identity graphs by automatically triggering sub-queries on + high-confidence pivot candidates (usernames, secondary emails, phones) + up to a configurable depth, with a strict seen-targets set to prevent + infinite loops. + """ + + def __init__(self, orchestrator: "Orchestrator", max_depth: int = None) -> None: + self._orc = orchestrator + self._max_depth = max_depth or Cfg.PIVOT_DEPTH + self._seen: Set[str] = set() + + def enrich(self, seed_records: List[Record], seed_target: str) -> List[Record]: + """ + Given an initial set of records, extract pivot candidates and + recursively scan them, returning all discovered records. + """ + self._seen.add(seed_target.lower()) + all_records = list(seed_records) + self._pivot(seed_records, depth=1, all_records=all_records) + return all_records + + def _pivot(self, records: List[Record], depth: int, all_records: List[Record]) -> None: + if depth > self._max_depth: + return + # Only pivot on records with sufficient source confidence + confident = [r for r in records if getattr(r, "source_confidence", 1.0) >= Cfg.PIVOT_CONFIDENCE] + candidates = self._extract_candidates(confident or records) + for candidate, qtype in candidates: + key = candidate.lower() + if key in self._seen: + continue + self._seen.add(key) + out("pivot", f" [Depth {depth}] Pivoting on {qtype}: {candidate}") + try: + new_records = self._orc.scan(candidate, qtype) + if new_records: + all_records.extend(new_records) + self._pivot(new_records, depth + 1, all_records) + except Exception as exc: + logger.debug("Pivot error %s: %s", candidate, exc) + + @staticmethod + def _extract_candidates(records: List[Record]) -> List[Tuple[str, str]]: + candidates: List[Tuple[str, str]] = [] + seen_vals: Set[str] = set() + for r in records: + for val, qtype in [ + (_rec_get(r, "email"), "email"), + (_rec_get(r, "username"), "username"), + (_rec_get(r, "phone"), "phone"), + (_rec_get(r, "full_name"), "name"), + (_rec_get(r, "name"), "name"), + ]: + if val and val.lower() not in seen_vals and len(val) > 3: + seen_vals.add(val.lower()) + candidates.append((val, qtype)) + return candidates[:30] + + +# ======================================================================= +# ASYNC ORCHESTRATOR — Full asyncio event loop +# ======================================================================= +class Orchestrator: + def __init__(self, config: NoxConfig = None, db: NoxDB = None) -> None: + self.config = config or NoxConfig() + self.db = db or NoxDB() + self.session = Session(self.config) + self.hash_engine = HashEngine(self.db, self.session) + self.pass_analyzer = PassAnalyzer() + self.dork_engine = DorkEngine(self.session) + self.scrape_engine = ScrapeEngine(self.session, self.db) + self.intel_db = DatabaseManager() + self.dorking_engine = DorkingEngine(self.config.concurrency, self.db, self.config) + self._json_sources: List["JSONSourceLoader"] = [] + self._source_orchestrator: Optional["SourceOrchestrator"] = None + + def _get_semaphore(self) -> asyncio.Semaphore: + # Always create a fresh semaphore bound to the current running loop. + return asyncio.Semaphore(self.config.concurrency) + + # ── Async core scan ─────────────────────────────────────────────── + + async def _async_scan(self, target: str, query_type: str) -> List[Record]: + """ + Run all source queries as non-blocking coroutines managed by a + global asyncio.Semaphore. + """ + # ── Fail-Safe Proxy check (transport-level, before any connection) ── + ProxyManager.fail_safe_check(self.config, allow_leak=self.config.allow_leak) + + # B1: recreate SourceOrchestrator on every call so the new semaphore is + # propagated to all source instances. Plugin JSON files are cached by + # SourceOrchestrator._load_nox_sources via the module-level mtime guard (L2). + if self._source_orchestrator is None: + self._source_orchestrator = SourceOrchestrator( + self._get_semaphore(), self.db, self.config + ) + self._source_orchestrator._ensure_loaded() + else: + # Rebind semaphore AND propagate to all loaded source instances + new_sem = self._get_semaphore() + self._source_orchestrator._sem = new_sem + for src in (self._source_orchestrator._nox_sources + + self._source_orchestrator._fs_providers + + self._source_orchestrator._py_providers): + src._sem_obj = new_sem + sources = self._source_orchestrator.get_sources(self.session, query_type) + + out("info", f"Active sources: {len(sources)} / {self._source_orchestrator.plugin_count()} (filtered for input type: {query_type})") + + if not aiohttp_mod: + # Fallback: synchronous thread pool + from concurrent.futures import ThreadPoolExecutor, as_completed + records = [] + with ThreadPoolExecutor(max_workers=self.config.concurrency) as executor: + futures = {executor.submit(src.search, target, query_type): src for src in sources} + for i, future in enumerate(as_completed(futures), 1): + src = futures[future] + try: + recs = future.result(timeout=self.config.timeout + 5) + if recs: + records.extend(recs) + out("ok", f" [{i}/{len(sources)}] {src.name}: {len(recs)} results") + else: + out("dim", f" [{i}/{len(sources)}] {src.name}: 0 results") + except Exception as exc: + out("dim", f" [{i}/{len(sources)}] {src.name}: error - {str(exc)[:50]}") + return records + + connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX, limit=self.config.concurrency, family=0) # family=0 → AF_UNSPEC (IPv4+IPv6) + # B5: SOCKS5 proxies are not supported via trust_env — use ProxyConnector directly. + _socks5_connector = False + if self.config.proxy and self.config.proxy.startswith("socks5"): + try: + from aiohttp_socks import ProxyConnector as _ProxyConnector # type: ignore + connector = _ProxyConnector.from_url(self.config.proxy, ssl=_SSL_CTX, limit=self.config.concurrency) + _socks5_connector = True + except ImportError: + logger.warning("aiohttp_socks not installed — SOCKS5 proxy bypassed. Install: pip install aiohttp-socks") + # B2: set _proxy_env_set flag immediately after os.environ assignment + # Use a module-level lock to prevent concurrent scans from racing on env vars. + _proxy_env_set = False + if self.config.proxy and not _socks5_connector and not os.environ.get("HTTPS_PROXY"): + with _PROXY_ENV_LOCK: + if not os.environ.get("HTTPS_PROXY"): + os.environ["HTTPS_PROXY"] = self.config.proxy + os.environ["HTTP_PROXY"] = self.config.proxy + _proxy_env_set = True + session_kwargs: dict = {"trust_env": True} if (self.config.proxy and not _socks5_connector) else {} + # Per-source semaphores — fresh each call, bound to the current running loop. + _source_sems: Dict[str, asyncio.Semaphore] = {} + try: + async with aiohttp_mod.ClientSession(connector=connector, **session_kwargs) as session: + _counter = [0] + # Breach sources only — DorkingEngine is dispatched separately in fullscan/autoscan. + tasks = [ + asyncio.create_task(self._run_source(session, src, target, query_type, _counter, len(sources), _source_sems)) + for src in sources + ] + results = await asyncio.gather(*tasks, return_exceptions=True) + finally: + if _proxy_env_set: + os.environ.pop("HTTPS_PROXY", None) + os.environ.pop("HTTP_PROXY", None) + + records = [] + for r in results: + if isinstance(r, list): + records.extend(r) + return records + + async def _run_source(self, session, src, target: str, qtype: str, counter: list, total: int, source_sems: dict = None) -> List[Record]: + # Per-source semaphore: max 3 concurrent requests per source + if source_sems is None: + source_sems = {} + src_name = getattr(src, "name", "unknown") + if src_name not in source_sems: + source_sems[src_name] = asyncio.Semaphore(3) + try: + async with source_sems[src_name]: + recs = await src.async_search(session, target, qtype) + counter[0] += 1 + idx = counter[0] + if recs: + out("ok", f" [{idx}/{total}] {src.name}: {len(recs)} results") + else: + out("dim", f" [{idx}/{total}] {src.name}: 0 results") + return recs or [] + except Exception as exc: + counter[0] += 1 + idx = counter[0] + out("dim", f" [{idx}/{total}] {src.name}: error - {str(exc)[:50]}") + return [] + + # ── Public scan API ─────────────────────────────────────────────── + + def scan(self, target: str, query_type: str = None) -> List[Record]: + if not query_type: + query_type = Detect.qtype(target) + out("info", f"Scanning: {target} (type: {query_type})") + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + try: + if loop and loop.is_running(): + import concurrent.futures + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex: + records = ex.submit( + asyncio.run, self._full_async_scan(target, query_type) + ).result(timeout=300) + else: + records = asyncio.run(self._full_async_scan(target, query_type)) + except Exception: + records = [] + return records + + async def _full_async_scan(self, target: str, query_type: str) -> List[Record]: + """Async pipeline: cache-check → network scan → score → persist → dehash → reputation.""" + # Cache check + try: + cached = await self.intel_db.get_cached(target) + if cached: + out("ok", f"Cache hit: {len(cached)} records (< 24 h old)") + return self._hydrate_cache(cached) + except Exception as exc: + logger.debug("Cache check failed: %s", exc) + + records = await self._async_scan(target, query_type) + out("ok", f"\nScan complete: {len(records)} records") + + records = [RiskEngine.score(r) for r in records] + records = RiskEngine.apply_persistence(records) + HVTAnalyzer.annotate(records) + + # Vault AutoDehash hook — run in executor to avoid blocking the event loop + loop = asyncio.get_running_loop() + records = await loop.run_in_executor(None, Vault.autodehash, records, self.db) + + # DeHash & Reputation enrichment — run concurrently (best-effort, non-blocking) + if aiohttp_mod: + connector = aiohttp_mod.TCPConnector(ssl=_SSL_CTX, limit=5) + async with aiohttp_mod.ClientSession(connector=connector) as enrich_session: + dehash_eng = DeHashEngine(self.db, self.config) + rep_eng = ReputationEngine(self.config) + _dehash_res, rep_result = await asyncio.gather( + dehash_eng.dehash_records(enrich_session, records), + rep_eng.check(enrich_session, target, query_type), + return_exceptions=True, + ) + if isinstance(_dehash_res, list): + records = _dehash_res + if isinstance(rep_result, dict) and rep_result: + out("info", f"VirusTotal: {rep_result['malicious']} malicious, " + f"{rep_result['suspicious']} suspicious detections for {target}") + + try: + await self.intel_db.cache_records(target, query_type, records) + except Exception as exc: + logger.debug("DB persist failed: %s", exc) + + return records + + async def fullscan(self, target: str, pivot: bool = True): + """Full autoscan: Recursive Avalanche Engine — breach + dork + scrape on every discovered asset.""" + out("info", f"[*] Avalanche scan starting: {target}") + _t0 = time.time() + + if _HAS_AVALANCHE and pivot: + engine = AvalancheScanner(self) + all_records, dork_results, scrape_results = await engine.run(target) + pivot_chain = [target] + [a for a in engine.seen_assets if a != target.lower()] + pivot_depth = engine.get_max_depth() + pivot_log = engine.pivot_log + discovered_assets = engine.get_discovered_assets() + else: + all_records = await self._full_async_scan(target, Detect.qtype(target)) + loop = asyncio.get_running_loop() + dork_results, scrape_results = await asyncio.gather( + self.async_dork(target), + loop.run_in_executor(None, self.scrape, target), + return_exceptions=True, + ) + if isinstance(dork_results, Exception): dork_results = [] + if isinstance(scrape_results, Exception): scrape_results = {} + pivot_chain = [target] + pivot_depth = 0 + pivot_log = [] + discovered_assets = [] + + # ── Enrich scraped results into records ─────────────────────── + for cred in scrape_results.get("credentials", []): + raw = cred.get("raw", "") + if ":" in raw: + parts = raw.split(":", 1) + em, pw = parts[0].strip(), parts[1].strip() + r = Record(source=cred.get("source", "ScrapeEngine"), + email=em if "@" in em else "", + username=em if "@" not in em else "", + password=pw, + breach_name=cred.get("paste_id", ""), + data_types=["Scraped", "Credentials"]) + else: + r = Record(source=cred.get("source", "ScrapeEngine"), + raw_data=cred, + breach_name=cred.get("paste_id", ""), + data_types=["Scraped"]) + r = RiskEngine.score(r) + all_records.append(r) + + for paste in scrape_results.get("pastes", []): + r = Record(source=paste.get("source", "PasteScraper"), + breach_name=paste.get("id", ""), + raw_data=paste, + data_types=["Paste"]) + r = RiskEngine.score(r) + all_records.append(r) + + for tg in scrape_results.get("telegram", []): + r = Record(source=f"Telegram/{tg.get('channel', 'unknown')}", + raw_data=tg, + data_types=["Telegram"]) + r = RiskEngine.score(r) + all_records.append(r) + + for mc in scrape_results.get("dork_misconfigs", []): + r = Record(source="MisconfigScraper", + domain=mc.get("url", ""), + raw_data=mc, + data_types=["Misconfiguration"]) + r = RiskEngine.score(r) + all_records.append(r) + + analysis = CredAnalyzer.analyze(all_records) + HVTAnalyzer.annotate(all_records) # set is_hvt field on every record + hvt_records = HVTAnalyzer.filter_hvt(all_records) + + return { + "target": target, + "records": all_records, + "analysis": analysis, + "hvt_records": hvt_records, + "dork_results": dork_results, + "scrape_results": scrape_results, + "pivot_chain": pivot_chain, + "pivot_log": pivot_log, + "discovered_assets": discovered_assets, + "scan_meta": { + "elapsed_seconds": round(time.time() - _t0, 1), + "pivot_depth": pivot_depth, + "nodes_discovered": len({ + v.lower() for r in all_records + for v in [ + _rec_get(r, "email"), _rec_get(r, "username"), + _rec_get(r, "ip_address"), _rec_get(r, "phone"), _rec_get(r, "domain"), + ] if v + }), + }, + } + + def crack(self, hash_value: str) -> dict: + return self.hash_engine.crack(hash_value) + + def analyze_pass(self, password: str) -> dict: + return self.pass_analyzer.analyze(password) + + def dork(self, target: str, query_type: str = None) -> List[dict]: + if not query_type: + query_type = Detect.qtype(target) + return self.dork_engine.run(target, query_type) + + async def async_dork(self, target: str, session=None) -> List[dict]: + """Native async dork dispatch via DorkingEngine.""" + try: + import aiohttp as _aio # type: ignore + if session is None: + connector = _aio.TCPConnector(limit=10, ssl=_SSL_CTX, family=0) + async with _aio.ClientSession(connector=connector) as _s: + records = await self.dorking_engine.async_search(_s, target, Detect.qtype(target)) + else: + records = await self.dorking_engine.async_search(session, target, Detect.qtype(target)) + return [ + { + "url": r.raw_data.get("url", "") if hasattr(r, "raw_data") else "", + "title": r.raw_data.get("url", r.raw_data.get("dork", "")) if hasattr(r, "raw_data") else "", + "snippet": "", + "dork": r.raw_data.get("dork", "") if hasattr(r, "raw_data") else "", + "engine": "DDG", + } + for r in records + ] + except Exception as exc: + logger.debug("async_dork %s: %s", target, exc) + return [] + + def scrape(self, target: str, query_type: str = None) -> dict: + if not query_type: + query_type = Detect.qtype(target) + return self.scrape_engine.run(target, query_type) + + @staticmethod + def _hydrate_cache(cached: List[dict]) -> List[Record]: + records = [] + for d in cached: + try: + dt = d.get("data_types","[]") + if isinstance(dt, str): + try: dt = json.loads(dt) + except Exception: dt = [] + rs = float(d.get("risk_score", 0.0)) + if rs >= 90: sev = Severity.CRITICAL + elif rs >= 70: sev = Severity.HIGH + elif rs >= 40: sev = Severity.MEDIUM + elif rs >= 10: sev = Severity.LOW + else: sev = Severity.INFO + records.append(Record( + source=d.get("source",""), email=d.get("email",""), + username=d.get("username",""), password=d.get("password",""), + password_hash=d.get("password_hash",""), hash_type=d.get("hash_type",""), + phone=d.get("phone",""), breach_name=d.get("breach_name",""), + breach_date=d.get("breach_date",""), data_types=dt, severity=sev, + risk_score=rs, source_confidence=float(d.get("source_conf",0.5)), + is_hvt=bool(d.get("is_hvt",0)), + )) + except Exception: + continue + return records + + +# ======================================================================= +# ADVANCED REPORTER +# ======================================================================= +class AdvancedReporter: + # Control characters and binary garbage that break PDF/terminal rendering + _CTRL_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]") + + @staticmethod + def sanitize_payload(value: Any) -> str: + """ + Central sanitization for all user-supplied / breach-sourced strings. + + 1. Coerce to str. + 2. Strip control characters and binary garbage (safe for PDF/terminal). + 3. HTML-escape the result (safe for HTML embedding — prevents XSS). + + Example: '' → '<script>alert(1)</script>' + """ + s = str(value) if value is not None else "" + s = AdvancedReporter._CTRL_RE.sub("", s) + return html_module.escape(s) + + @staticmethod + def _raw(value: Any) -> str: + """Strip control chars only — no HTML escaping (for PDF/CSV/plain-text paths).""" + s = str(value) if value is not None else "" + return AdvancedReporter._CTRL_RE.sub("", s) + + @staticmethod + def _build_summary(records: list) -> dict: + identities: Set[str] = set(); hvt_list = []; stealers = 0 + buckets = {"Critical":0,"High":0,"Medium":0,"Low":0,"Info":0} + pw_patterns: Dict[str,int] = {}; top_threats = [] + for r in records: + ident = _rec_get(r,"email") or _rec_get(r,"username") + if ident: identities.add(ident) + if HVTAnalyzer.is_hvt(r): hvt_list.append(ident) + if _is_stealer(r): stealers += 1 + rs = float(_rec_get(r,"risk_score") or 0) + if rs >= 90: buckets["Critical"] += 1 + elif rs >= 70: buckets["High"] += 1 + elif rs >= 40: buckets["Medium"] += 1 + elif rs >= 10: buckets["Low"] += 1 + else: buckets["Info"] += 1 + pw = _rec_get(r,"password") + if pw: + if re.search(r"[A-Z]",pw) and re.search(r"\d",pw) and re.search(r"[!@#$%^&*]",pw): pat = "Complex" + elif _CORP_PW_RE.match(pw): pat = "Corporate (Word+Year+Symbol)" + elif pw.isdigit(): pat = "Numeric only" + elif pw.isalpha(): pat = "Alpha only" + else: pat = "Other" + pw_patterns[pat] = pw_patterns.get(pat,0) + 1 + if rs >= 70: top_threats.append(r) + top_threats.sort(key=lambda r: float(_rec_get(r,"risk_score") or 0), reverse=True) + return {"total_identities":len(identities),"total_records":len(records),"hvt_list":list(dict.fromkeys(hvt_list))[:30],"hvt_count":len(set(hvt_list)),"stealer_count":stealers,"buckets":buckets,"pw_patterns":sorted(pw_patterns.items(),key=lambda x:-x[1])[:8],"top_threats":top_threats[:20]} + + @staticmethod + def _heatmap_bar(value: float, max_val: int = 100) -> str: + pct = min(100, int(value / max(max_val,1) * 100)) + colour = "#ff0040" if pct >= 90 else "#ff6600" if pct >= 70 else "#ffcc00" if pct >= 40 else "#00cc44" + return (f'
' + f'
' + f'{value:.1f}') + + @staticmethod + def to_html(data: dict, path: str) -> None: + records = data.get("records",[]) + target = data.get("target","Unknown") + s = AdvancedReporter._build_summary(records) + rec_dicts = [r.to_dict() if hasattr(r,"to_dict") else r for r in records] + kpi_html = (f'
{s["total_identities"]}
COMPROMISED IDENTITIES
' + f'
{s["stealer_count"]}
STEALER LOGS
' + f'
{s["hvt_count"]}
HIGH-VALUE TARGETS
' + f'
{s["total_records"]}
TOTAL RECORDS
' + f'
{len(data.get("discovered_assets") or [])}
REINJECTED ASSETS
') + total = max(sum(s["buckets"].values()),1) + heatmap_rows = "".join(f'{lvl}{AdvancedReporter._heatmap_bar(cnt,total)}{cnt}' for lvl,cnt in s["buckets"].items()) + pw_rows = "".join(f'{p}{c}{AdvancedReporter._heatmap_bar(c,max((c2 for _,c2 in s["pw_patterns"]),default=1))}' for p,c in s["pw_patterns"]) + + _sp = AdvancedReporter.sanitize_payload # shorthand + + threat_rows = "".join( + f'' + f'{_sp(_rec_get(r,"email") or _rec_get(r,"username"))}' + f'{_sp(_rec_get(r,"password") or "")}' + f'{_sp(_rec_get(r,"password_hash") or "")[:30]}' + f'{_sp(_rec_get(r,"ip_address") or "")}' + f'{_sp(_rec_get(r,"phone") or "")}' + f'{_sp(_rec_get(r,"domain") or "")}' + f'{_sp(_rec_get(r,"source"))}' + f'{_sp(_rec_get(r,"breach_date"))}' + f'{AdvancedReporter._heatmap_bar(float(_rec_get(r,"risk_score") or 0))}' + f'{"⚑ HVT" if HVTAnalyzer.is_hvt(r) else ""}' + for r in s["top_threats"] + ) + hvt_items = "".join(f'
  • ⚠ {_sp(v)}
  • ' for v in s["hvt_list"]) or "
  • None detected
  • " + cred_rows = "" + for r in rec_dicts[:500]: + rs = float(r.get("risk_score",0) if isinstance(r,dict) else getattr(r,"risk_score",0)) + cls = "crit" if rs>=90 else "high" if rs>=70 else "med" if rs>=40 else "" + hvt_badge = "⚑" if HVTAnalyzer.is_hvt(r) else "" + cred_rows += ( + f"" + f"{_sp(_rec_get(r,'email'))}{hvt_badge}" + f"{_sp(_rec_get(r,'username') or '')}" + f"{_sp(_rec_get(r,'password') or '')}" + f"{_sp((_rec_get(r,'password_hash') or '')[:30])}" + f"{_sp(_rec_get(r,'ip_address') or '')}" + f"{_sp(_rec_get(r,'phone') or '')}" + f"{_sp(_rec_get(r,'domain') or '')}" + f"{_sp(_rec_get(r,'source'))}" + f"{_sp(_rec_get(r,'breach_date'))}" + f"{AdvancedReporter._heatmap_bar(rs)}" + ) + # ── Discovered documents section ────────────────────────────── + doc_rows = "" + for r in records: + src = _rec_get(r, "source") + if src != "DorkingEngine": + continue + rd = r if isinstance(r, dict) else r.raw_data if hasattr(r, "raw_data") else {} + meta = (r.metadata if hasattr(r, "metadata") else {}) or {} + url = rd.get("url", "") if isinstance(rd, dict) else "" + ext = url.lower().rsplit(".", 1)[-1].split("?")[0] if "." in url else "" + paths = "; ".join(meta.get("local_paths", [])) + emails = "; ".join(meta.get("emails", [])) + doc_rows += ( + f"" + f"{_sp(url[:80])}" + f"{_sp(ext)}" + f"{_sp(meta.get('author',''))}" + f"{_sp(meta.get('creator',''))}" + f"{_sp(paths)}" + f"{_sp(emails)}" + ) + doc_section = (f'

    🔍 Discovered Public Documents & Metadata

    ' + f'' + f'{doc_rows if doc_rows else ""}
    URLTypeAuthorCreatorLocal PathsEmails
    No documents found
    ' + ) + + # ── Dork hits section ───────────────────────────────────────── + dork_results = data.get("dork_results", []) or [] + dork_hit_rows = "" + for h in dork_results: + url = h.get("url", "") + title = h.get("title", "") + snippet = h.get("snippet", "") + dork_q = h.get("dork", "") + engine = h.get("engine", "") + link = f'{_sp(url[:90])}' if url else _sp(title[:90]) + dork_hit_rows += ( + f"" + f"{link}" + f"{_sp(snippet[:120])}" + f"{_sp(dork_q[:80])}" + f"{_sp(engine)}" + f"" + ) + dork_section = ( + f'

    🔎 Dork Results ({len(dork_results)} hits)

    ' + f'' + f'{dork_hit_rows if dork_hit_rows else ""}
    URL / TitleSnippetDork QueryEngine
    No dork hits
    ' + ) + + # ── Scrape section ──────────────────────────────────────────── + scrape_results = data.get("scrape_results", {}) or {} + + # Pastes + paste_rows = "" + for p in scrape_results.get("pastes", []): + site = _sp(p.get("site", "")) + pid = p.get("id", "") + title = _sp(p.get("title", pid)[:80]) + query = _sp(p.get("query", "")[:60]) + # Build a best-effort direct link + paste_links = { + "Pastebin": f"https://pastebin.com/{pid}", + "Rentry": f"https://rentry.co/{pid}", + "Hastebin": f"https://hastebin.com/{pid}", + "DPaste": f"https://dpaste.org/{pid}", + "Ghostbin": f"https://ghostbin.com/paste/{pid}", + "JustPaste":f"https://justpaste.it/{pid}", + "ControlC": f"https://controlc.com/{pid}", + "Paste2": f"https://paste2.org/raw/{pid}", + "PastebinPro": f"https://pastebin.com/{pid}", + } + link_url = paste_links.get(p.get("site", ""), "") + link_html = (f'{title or pid}' + if link_url else (title or _sp(pid))) + patterns = p.get("patterns", {}) + pat_str = _sp(", ".join(f"{k}({len(v)})" for k, v in patterns.items()) if patterns else "") + paste_rows += f"{site}{link_html}{pat_str}{query}" + + # Credentials extracted from pastes + cred_scrape_rows = "" + for c in scrape_results.get("credentials", []): + raw = _sp(c.get("raw", "")[:120]) + src = _sp(c.get("source", "")) + pid = c.get("paste_id", "") + cred_scrape_rows += f"{raw}{src}{_sp(pid)}" + + # Telegram hits + tg_rows = "" + for t in scrape_results.get("telegram", []): + ch = _sp(t.get("channel", "")) + text = _sp(t.get("text", "")[:200]) + pats = _sp(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items())) + link = f't.me/s/{ch}' + tg_rows += f"{link}{text}{pats}" + + # Misconfig dork hits + mc_rows = "" + for m in scrape_results.get("dork_misconfigs", []): + url_m = m.get("url", "") + title_m = _sp(m.get("title", "")[:80]) + dork_m = _sp(m.get("dork", "")[:80]) + link_m = (f'{_sp(url_m[:80])}' + if url_m else title_m) + mc_rows += f"{link_m}{title_m}{dork_m}" + + scrape_section = ( + f'

    📋 Scrape Results

    ' + f'

    Pastes ({len(scrape_results.get("pastes",[]))})

    ' + f'' + f'{paste_rows or ""}
    SitePaste / LinkPatterns FoundQuery
    None
    ' + f'

    Extracted Credentials ({len(scrape_results.get("credentials",[]))})

    ' + f'' + f'{cred_scrape_rows or ""}
    Raw CredentialSourcePaste ID
    None
    ' + f'

    Telegram CTI ({len(scrape_results.get("telegram",[]))})

    ' + f'' + f'{tg_rows or ""}
    ChannelMessagePatterns
    None
    ' + f'

    Misconfigurations ({len(scrape_results.get("dork_misconfigs",[]))})

    ' + f'' + f'{mc_rows or ""}
    URLTitleDork
    None
    ' + f'
    ' + ) + + css = ("*{margin:0;padding:0;box-sizing:border-box}body{font-family:'Courier New',monospace;background:#0a0a0a;color:#e0e0e0;padding:20px}.header{text-align:center;padding:30px;border:1px solid #333;margin-bottom:20px;background:#111}.header h1{color:#00ff41;font-size:28px;letter-spacing:4px}.header p{color:#888;margin-top:6px}.stats{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:12px;margin:15px 0}.stat{background:#111;border:1px solid #333;padding:18px;text-align:center}.stat .num{font-size:32px;font-weight:bold;color:#00ff41}.stat .label{color:#888;font-size:11px;margin-top:4px}.stat.crit .num{color:#ff0040}.stat.hvt .num{color:#ff6600}.section{margin:20px 0}.section h2{color:#00ff41;border-bottom:1px solid #333;padding-bottom:6px;margin-bottom:12px}.section h3{color:#aaa;margin:12px 0 6px}table{width:100%;border-collapse:collapse}th,td{padding:8px;border:1px solid #222;font-size:12px;word-break:break-all}th{background:#1a1a1a;color:#00ff41;text-transform:uppercase;font-size:11px}td{background:#0d0d0d}tr.crit td{background:#1a0005}tr.high td{background:#1a0a00}tr.med td{background:#1a1500}.pw{color:#ff0040;font-weight:bold}.hvt-box{background:#1a0a00;border:1px solid #ff6600;padding:12px;margin:10px 0}.hvt-box ul{padding-left:20px;color:#ff6600}.pivot-node{margin:4px 0;padding:6px 10px;border-left:2px solid #333;background:#0d0d0d}.pivot-seed{border-left-color:#00ff41}.pivot-pivot{border-left-color:#00ccff}.pivot-crack{border-left-color:#cc00ff}.pivot-asset{color:#00ccff;font-weight:bold}.pivot-stats{color:#888;font-size:11px;margin-top:3px}.pivot-children{margin-left:20px;border-left:1px solid #222;padding-left:8px}") + + # ── Pivot Tree HTML section ─────────────────────────────────── + pivot_log = data.get("pivot_log", []) or [] + if pivot_log: + log_by_key_html = {e["asset"].lower(): e for e in pivot_log} + def _build_pivot_html(entries: list) -> str: + html = "" + for e in entries: + found_in = e.get("found_in", e.get("source", "?")) + src_color = {"seed": "#00ff41", "breach": "#ff0040", "dork": "#ff6600", + "scrape": "#cc00ff", "hash_crack": "#cc00ff", + "pivot": "#00ccff"}.get(found_in, "#888") + stats_parts = [] + if e["records"]: stats_parts.append(f'{e["records"]} breach') + if e["dorks"]: stats_parts.append(f'{e["dorks"]} dork') + if e["scrape"]: stats_parts.append(f'{e["scrape"]} scrape') + if e.get("cracked"): stats_parts.append(f'cracked→{_sp(", ".join(e["cracked"][:2]))}') + # Children with phase+ref + children = e.get("children", []) + child_html_inner = "" + if children: + _phase_colors_html = {"breach": "#ff0040", "dork": "#ff6600", + "scrape": "#cc00ff", "hash_crack": "#cc00ff"} + child_html_inner = '
    ↳ reinjected: ' + parts_ch = [] + for ch in children[:6]: + ph = ch.get("found_in", "?") + col = _phase_colors_html.get(ph, "#888") + parts_ch.append( + f'[{_sp(ph)}] {_sp(ch.get("asset",""))}' + ) + child_html_inner += ", ".join(parts_ch) + if len(children) > 6: + child_html_inner += f" +{len(children)-6} more" + child_html_inner += "
    " + # Recurse into processed children + child_log_entries = [log_by_key_html[ch["asset"].lower()] + for ch in children + if ch.get("asset","").lower() in log_by_key_html] + child_tree = _build_pivot_html(child_log_entries) if child_log_entries else "" + html += ( + f'
    ' + f'[{found_in.upper()}] ' + f'{_sp(e["asset"])} ' + f'({_sp(e["qtype"])})' + + (f' ← {_sp(e["parent"])}' if e.get("parent") else "") + + (f'
    {"  |  ".join(stats_parts)}
    ' if stats_parts else "") + + child_html_inner + + (f'
    {child_tree}
    ' if child_tree else "") + + '
    ' + ) + return html + + roots_html = [e for e in pivot_log if e["depth"] == 0] + pivot_tree_html = _build_pivot_html(roots_html) + pivot_section = ( + f'

    🔄 Pivot Tree ({len(pivot_log)} nodes)

    ' + f'{pivot_tree_html}
    ' + ) + else: + pivot_section = "" + + # ── Discovered Assets section ───────────────────────────────── + discovered_assets = data.get("discovered_assets", []) or [] + _phase_badge_colors = { + "breach": "#ff0040", + "dork": "#ff6600", + "scrape": "#cc00ff", + "hash_crack": "#cc00ff", + "seed": "#00ff41", + } + da_rows = "" + for da in discovered_assets: + phase = da.get("phase", "?") + ref = da.get("ref", "") + ref_html = (f'{_sp(ref[:80])}' + if ref.startswith("http") else _sp(ref[:100])) + badge_col = _phase_badge_colors.get(phase, "#888") + da_rows += ( + f"" + f"{_sp(da.get('asset',''))}" + f"{_sp(da.get('qtype',''))}" + f"{_sp(phase.upper())}" + f"{ref_html}" + f"{_sp(da.get('parent',''))}" + f"{da.get('depth',0)}" + f"" + ) + discovered_section = ( + f'

    🔎 Discovered Assets ({len(discovered_assets)} new identifiers reinjected)

    ' + f'' + f'{da_rows if da_rows else ""}
    AssetTypePhaseReference (Source / URL / Paste)Discovered FromDepth
    No pivot assets discovered
    ' + ) + + page = (f'NOX Framework — {_sp(target)}' + f'

    [ NOX Framework ]

    Target: {_sp(target)}  |  {datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}  |  v{VERSION}

    ' + f'

    📋 Executive Summary

    {kpi_html}
    ' + f'

    Risk Heatmap

    {heatmap_rows}
    LevelDistribution#
    ' + f'

    Password Patterns

    {pw_rows}
    PatternCountPrevalence
    ' + f'

    ⚠ High-Value Targets ({s["hvt_count"]})

      {hvt_items}
    ' + f'

    🚨 Top Threats

    {threat_rows}
    IdentityPasswordHashIPPhoneDomainSourceDateRiskFlag
    ' + f'{pivot_section}' + f'{discovered_section}' + f'{doc_section}' + f'{dork_section}' + f'{scrape_section}' + f'

    Credential Records (top 500)

    {cred_rows}
    EmailUsernamePasswordHashIPPhoneDomainSourceDateRisk
    ' + f'') + with open(path, "w", encoding="utf-8") as fh: + fh.write(page) + out("ok", f"HTML report saved: {path}") + + @staticmethod + def to_markdown(data: dict, path: str) -> None: + records = data.get("records",[]) + target = data.get("target","Unknown") + s = AdvancedReporter._build_summary(records) + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + _r = AdvancedReporter._raw # strip control chars, no HTML escaping for markdown + lines = ["# NOX Framework Report","",f"**Target:** `{_r(target)}` ",f"**Generated:** {ts} ",f"**Version:** {VERSION}","","---","## Executive Summary","","| Metric | Value |","|--------|-------|",f"| Compromised Identities | **{s['total_identities']}** |",f"| Total Records | **{s['total_records']}** |",f"| Stealer Logs | **{s['stealer_count']}** |",f"| High-Value Targets | **{s['hvt_count']}** |","","### Risk Distribution","","| Level | Count |","|-------|-------|"] + for lvl, cnt in s["buckets"].items(): + if cnt: lines.append(f"| {lvl} | {cnt} |") + lines += ["","### Password Patterns","","| Pattern | Count |","|---------|-------|"] + for p, c in s["pw_patterns"]: lines.append(f"| {p} | {c} |") + if s["hvt_list"]: + lines += ["","### ⚠ High-Value Targets",""] + for v in s["hvt_list"]: lines.append(f"- `{_r(v)}`") + lines += ["","---","## Top Threats","","| Identity | Password | Hash | IP | Phone | Domain | Source | Date | Risk |","|----------|----------|------|----|-------|--------|--------|------|------|"] + for r in s["top_threats"]: + hvt = " ⚑" if HVTAnalyzer.is_hvt(r) else "" + lines.append( + f"| {_r(_rec_get(r,'email') or _rec_get(r,'username'))}{hvt}" + f" | {_r(_rec_get(r,'password'))}" + f" | {_r((_rec_get(r,'password_hash') or '')[:20])}" + f" | {_r(_rec_get(r,'ip_address') or '')}" + f" | {_r(_rec_get(r,'phone') or '')}" + f" | {_r(_rec_get(r,'domain') or '')}" + f" | {_r(_rec_get(r,'source'))}" + f" | {_r(_rec_get(r,'breach_date'))}" + f" | {_rec_get(r,'risk_score')} |" + ) + lines += ["","---","## Records (top 200)","","| Email | Username | Password | Hash | IP | Phone | Domain | Source | Date | Risk |","|-------|----------|----------|------|----|-------|--------|--------|------|------|"] + for r in records[:200]: + lines.append( + f"| {_r(_rec_get(r,'email'))}" + f" | {_r(_rec_get(r,'username') or '')}" + f" | {_r(_rec_get(r,'password') or '')}" + f" | {_r((_rec_get(r,'password_hash') or '')[:20])}" + f" | {_r(_rec_get(r,'ip_address') or '')}" + f" | {_r(_rec_get(r,'phone') or '')}" + f" | {_r(_rec_get(r,'domain') or '')}" + f" | {_r(_rec_get(r,'source'))}" + f" | {_r(_rec_get(r,'breach_date'))}" + f" | {_rec_get(r,'risk_score')} |" + ) + + # ── Dork results ────────────────────────────────────────────── + dork_results = data.get("dork_results", []) or [] + lines += ["","---",f"## Dork Results ({len(dork_results)} hits)",""] + if dork_results: + lines += ["| URL / Title | Snippet | Dork Query | Engine |","|-------------|---------|------------|--------|"] + for h in dork_results: + url = _r(h.get("url", h.get("title", ""))) + snippet = _r(h.get("snippet", "")[:100]) + dork_q = _r(h.get("dork", "")[:80]) + engine = _r(h.get("engine", "")) + link = f"[{url[:80]}]({url})" if url.startswith("http") else url[:80] + lines.append(f"| {link} | {snippet} | {dork_q} | {engine} |") + else: + lines.append("_No dork hits._") + + # ── Scrape results ──────────────────────────────────────────── + scrape_results = data.get("scrape_results", {}) or {} + + pastes = scrape_results.get("pastes", []) + lines += ["","---",f"## Scrape — Pastes ({len(pastes)})",""] + if pastes: + lines += ["| Site | Paste / Link | Patterns |","|------|-------------|----------|"] + paste_links = { + "Pastebin": "https://pastebin.com/{}", + "Rentry": "https://rentry.co/{}", + "Hastebin": "https://hastebin.com/{}", + "DPaste": "https://dpaste.org/{}", + "Ghostbin": "https://ghostbin.com/paste/{}", + "JustPaste":"https://justpaste.it/{}", + "ControlC": "https://controlc.com/{}", + "Paste2": "https://paste2.org/raw/{}", + "PastebinPro": "https://pastebin.com/{}", + } + for p in pastes: + site = _r(p.get("site", "")) + pid = p.get("id", "") + title = _r(p.get("title", pid)[:60]) + tmpl = paste_links.get(p.get("site", ""), "") + link = f"[{title or pid}]({tmpl.format(pid)})" if tmpl and pid else (title or _r(pid)) + pats = _r(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items())) + lines.append(f"| {site} | {link} | {pats} |") + else: + lines.append("_No pastes found._") + + creds_scraped = scrape_results.get("credentials", []) + lines += ["","---",f"## Scrape — Extracted Credentials ({len(creds_scraped)})",""] + if creds_scraped: + lines += ["| Raw Credential | Source | Paste ID |","|----------------|--------|----------|"] + for c in creds_scraped: + lines.append(f"| `{_r(c.get('raw','')[:100])}` | {_r(c.get('source',''))} | {_r(c.get('paste_id',''))} |") + else: + lines.append("_No credentials extracted._") + + tg_hits = scrape_results.get("telegram", []) + lines += ["","---",f"## Scrape — Telegram CTI ({len(tg_hits)})",""] + if tg_hits: + lines += ["| Channel | Message (excerpt) | Patterns |","|---------|-------------------|----------|"] + for t in tg_hits: + ch = _r(t.get("channel", "")) + text = _r(t.get("text", "")[:150]) + pats = _r(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items())) + link = f"[t.me/s/{ch}](https://t.me/s/{ch})" + lines.append(f"| {link} | {text} | {pats} |") + else: + lines.append("_No Telegram hits._") + + mc_hits = scrape_results.get("dork_misconfigs", []) + lines += ["","---",f"## Scrape — Misconfigurations ({len(mc_hits)})",""] + if mc_hits: + lines += ["| URL | Title | Dork |","|-----|-------|------|"] + for m in mc_hits: + url_m = _r(m.get("url", "")) + title_m = _r(m.get("title", "")[:60]) + dork_m = _r(m.get("dork", "")[:60]) + link_m = f"[{url_m[:60]}]({url_m})" if url_m.startswith("http") else url_m[:60] + lines.append(f"| {link_m} | {title_m} | {dork_m} |") + else: + lines.append("_No misconfigurations found._") + + # ── Pivot Tree ──────────────────────────────────────────────── + pivot_log = data.get("pivot_log", []) or [] + if pivot_log: + lines += ["","---",f"## Pivot Tree ({len(pivot_log)} nodes)","", + "| Depth | Asset | Type | Found In | Parent | Breach | Dorks | Scrape | Children | Cracked |", + "|-------|-------|------|----------|--------|--------|-------|--------|----------|---------|"] + # J4: sort by (depth, parent, asset) for readable depth-first narrative + for e in sorted(pivot_log, key=lambda x: (x.get("depth", 0), x.get("parent") or "", x.get("asset", ""))): + cracked_str = _r(", ".join(e.get("cracked", [])[:3])) + children = e.get("children", []) + children_str = _r(", ".join( + f"{ch.get('asset','')}[{ch.get('found_in','?')}]" + for ch in children[:4] + )) + if len(children) > 4: + children_str += f" +{len(children)-4}" + lines.append( + f"| {e['depth']}" + f" | `{_r(e['asset'])}`" + f" | {_r(e['qtype'])}" + f" | {_r(e.get('found_in', e.get('source','?')))}" + f" | {_r(e.get('parent') or '')}" + f" | {e['records']}" + f" | {e['dorks']}" + f" | {e['scrape']}" + f" | {children_str}" + f" | {cracked_str} |" + ) + + # ── Discovered Assets ───────────────────────────────────────── + discovered_assets = data.get("discovered_assets", []) or [] + lines += ["","---",f"## Discovered Assets ({len(discovered_assets)} new identifiers reinjected)",""] + if discovered_assets: + lines += ["| Asset | Type | Phase | Reference (Source / URL / Paste) | Discovered From | Depth |", + "|-------|------|-------|----------------------------------|-----------------|-------|"] + for da in discovered_assets: + ref = _r(da.get("ref", "")) + link = f"[{ref[:70]}]({ref})" if ref.startswith("http") else ref[:80] + lines.append( + f"| `{_r(da.get('asset',''))}`" + f" | {_r(da.get('qtype',''))}" + f" | **{_r(da.get('phase','?')).upper()}**" + f" | {link}" + f" | {_r(da.get('parent',''))}" + f" | {da.get('depth',0)} |" + ) + else: + lines.append("_No pivot assets discovered._") + + with open(path, "w", encoding="utf-8") as fh: + fh.write("\n".join(lines) + "\n") + out("ok", f"Markdown saved: {path}") + + +# ======================================================================= +# REPORTER FACADE +# ======================================================================= +class Reporter: + @staticmethod + def _resolve_path(path: str, fmt: str) -> str: + """If path is not absolute, place it under REPORT_DIR.""" + p = Path(path) + if not p.is_absolute(): + p = REPORT_DIR / p + return str(p) + + @staticmethod + def to_json(data: dict, path: str) -> None: + path = Reporter._resolve_path(path, "json") + if _HAS_REPORTING: + _rep_json(data, path); return + def ser(o): + if isinstance(o, (Severity, Enum)): return o.name + if isinstance(o, Record): return o.to_dict() + return str(o) + with open(path, "w") as f: + json.dump(data, f, indent=2, default=ser) + out("ok", f"JSON report saved: {path}") + + @staticmethod + def to_csv(records: list, path: str) -> None: + path = Reporter._resolve_path(path, "csv") + if not records: return + fields = ["email","password","password_hash","username","domain","ip_address","phone","breach_name","breach_date","severity","risk_score","is_hvt","data_types","persistence_score"] + with open(path, "w", newline="", encoding="utf-8") as f: + w = csv.DictWriter(f, fieldnames=fields, extrasaction="ignore") + w.writeheader() + for r in records: + row = dict(r) if isinstance(r,dict) else r.to_dict() + if isinstance(row.get("severity"), Severity): row["severity"] = row["severity"].name + if isinstance(row.get("data_types"), list): row["data_types"] = ", ".join(row["data_types"]) + w.writerow(row) + out("ok", f"CSV saved: {path}") + + @staticmethod + def to_html(data: dict, path: str) -> None: + path = Reporter._resolve_path(path, "html") + if _HAS_REPORTING: + _rep_html(data, path); return + AdvancedReporter.to_html(data, path) + + @staticmethod + def to_markdown(data: dict, path: str) -> None: + path = Reporter._resolve_path(path, "md") + AdvancedReporter.to_markdown(data, path) + + @staticmethod + def to_pdf(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None: + path = Reporter._resolve_path(path, "pdf") + if _HAS_REPORTING: + # D1: _rep_pdf raises RuntimeError if fpdf2 is missing — let it propagate + try: + _rep_pdf(data, path, investigator_id=investigator_id) + except RuntimeError as e: + out("err", str(e)) + return + # ForensicReporter (fpdf2, full forensic layout) — primary path + try: + import fpdf as _fpdf_check; del _fpdf_check # noqa: F401 + ForensicReporter.generate(data, path, investigator_id=investigator_id) + return + except ImportError: + pass + # Fallback: weasyprint HTML→PDF + if not weasyprint: + # D1: explicit error — no silent return with no output file + out("err", "No PDF library found. Install fpdf2: pip install fpdf2") + return + tmp = tempfile.NamedTemporaryFile(suffix=".html", delete=False) + tmp_name = tmp.name + tmp.close() + try: + AdvancedReporter.to_html(data, tmp_name) + weasyprint(tmp_name).write_pdf(path) + out("ok", f"PDF saved: {path}") + finally: + try: + os.unlink(tmp_name) + except OSError: + pass + + +# ======================================================================= +# INTERACTIVE REPL +# ======================================================================= +class REPL: + def __init__(self) -> None: + self.config = NoxConfig() + self.db = NoxDB() + self.orc = Orchestrator(self.config, self.db) + self._last = None + self._last_full = None + # Investigation session state + self.session_state: Dict[str, Any] = { + "investigator_id": os.environ.get("NOX_INVESTIGATOR_ID", "NOX-AUTO"), + "targets_scanned": [], + "pivot_chain": [], + } + self._menu_items = [ + ("autoscan", "Full scan + pivot + dork + scrape + analyze"), + ("scan", "Quick breach intelligence scan"), + ("dork", "Google dorking for leaked data"), + ("scrape", "Deep paste/web scraping + Telegram indexing"), + ("crack", "Identify and crack a hash"), + ("analyze", "Deep password strength analysis"), + ("graph", "Forensic graph of last scan"), + ("visualize", "ASCII relationship map (Target → Data → Pivots)"), + ("pivot ", "Re-scan using result #n as new seed"), + ("search ", "Filter in-memory records by keyword"), + ("sources", "List loaded plugins with input_type, confidence, key status"), + ("export", "Export last results as HTML (or: export json/csv/md/pdf)"), + ("tor", "Toggle Tor routing"), + ("proxy", "Set proxy"), + ("config", "Configure threads/timeout"), + ("clear", "Clear screen"), + ("help", "Show this help"), + ("quit", "Exit NOX"), + ] + + def _show_menu(self) -> None: + print(f"\n {C.G}NOX Interactive Menu:{C.W}") + for i, (cmd, desc) in enumerate(self._menu_items, 1): + print(f" {C.Y}{i:2}.{C.W} {cmd:<12} - {desc}") + print() + + def run(self) -> None: + self._banner() + self._show_menu() + while True: + try: + raw = input(f"\n{C.G}nox{C.W}> ").strip() + if not raw: + continue + if raw.isdigit(): + num = int(raw) + if 1 <= num <= len(self._menu_items): + cmd_full = self._menu_items[num-1][0] + cmd = cmd_full.split()[0] # strip any suffix + # Commands that need a target/argument prompt + _needs_arg = {"autoscan","scan","dork","scrape","crack","analyze", + "export","config","proxy","pivot","search"} + if cmd in _needs_arg: + if cmd == "crack": + arg = input(f" {C.DM}Hash: {C.W}").strip() + elif cmd == "analyze": + arg = input(f" {C.DM}Password: {C.W}").strip() + elif cmd in ("config", "proxy"): + arg = input(f" {C.DM}Argument: {C.W}").strip() + elif cmd in ("pivot", "search"): + arg = input(f" {C.DM}Argument: {C.W}").strip() + elif cmd == "export": + arg = input(f" {C.DM}Format [html/json/csv/md/pdf]: {C.W}").strip() or "html" + else: + arg = input(f" {C.DM}Target: {C.W}").strip() + else: + arg = "" + self._dispatch(cmd, arg) + else: + out("warn", f"Invalid number: {num}") + else: + parts = raw.split(None, 1) + cmd = parts[0].lower() + arg = parts[1] if len(parts) > 1 else "" + self._dispatch(cmd, arg) + except KeyboardInterrupt: + print() + out("info", "Interrupted. Type 'quit' to exit.") + except EOFError: + break + except Exception as e: + out("err", f"Error: {e}") + + def _dispatch(self, cmd: str, arg: str) -> None: + if cmd in ("quit","exit","q"): + out("info", "Exiting.") + # B3: flush DB background thread before exit + try: + self.db.close() + except Exception: + pass + sys.exit(0) + elif cmd in ("help","h","?"): + self._help() + elif cmd == "autoscan": + self._fullscan(arg or input(f" {C.DM}Target: {C.W}").strip()) + elif cmd == "scan": + self._scan(arg or input(f" {C.DM}Target: {C.W}").strip()) + elif cmd == "dork": + self._dork(arg or input(f" {C.DM}Target: {C.W}").strip()) + elif cmd == "scrape": + self._scrape(arg or input(f" {C.DM}Target: {C.W}").strip()) + elif cmd == "crack": + self._crack(arg or input(f" {C.DM}Hash: {C.W}").strip()) + elif cmd == "analyze": + self._analyze(arg or input(f" {C.DM}Password: {C.W}").strip()) + elif cmd in ("sources", "list-sources"): + self._sources() + elif cmd == "export": + self._export(arg) + elif cmd == "tor": + self._tor() + elif cmd == "proxy": + self._proxy(arg) + elif cmd == "config": + self._config(arg) + elif cmd == "graph": + self._graph() + elif cmd in ("visualize", "vis"): + self._visualize() + elif cmd == "pivot": + self._pivot(arg) + elif cmd == "search": + self._search(arg or input(f" {C.DM}Query: {C.W}").strip()) + elif cmd == "clear": + os.system("clear" if os.name != "nt" else "cls") + elif cmd == "menu": + self._show_menu() + elif cmd == "banner": + self._banner() + else: + out("warn", f"Unknown command: {cmd}. Type 'help' or 'menu' for options.") + + def _banner(self) -> None: + opsec_proxy = self.config.proxy or self.config.use_tor + if opsec_proxy: + opsec_label = f"{C.G}[OPSEC: PROTECTED]{C.X}" + elif getattr(self.config, "allow_leak", False): + opsec_label = f"{C.R}[OPSEC: UNPROTECTED]{C.X}" + else: + opsec_label = f"{C.Y}[OPSEC: GUARDIAN]{C.X}" + print(f""" +{C.G} + ███╗ ██╗ ██████╗ ██╗ ██╗ + ████╗ ██║██╔═══██╗╚██╗██╔╝ + ██╔██╗ ██║██║ ██║ ╚███╔╝ + ██║╚██╗██║██║ ██║ ██╔██╗ + ██║ ╚████║╚██████╔╝██╔╝ ██╗ + ╚═╝ ╚═══╝ ╚═════╝ ╚═╝ ╚═╝ +{C.W} + Cyber Threat Intelligence Framework {C.Y}v{VERSION}{C.W} + {C.DM}120+ JSON plugin sources | Async Core | Pivot Engine | JA3 TLS | HVT Detection{C.W} + {opsec_label} +""") + + def _help(self) -> None: + self._show_menu() + out("info", "\nYou can also type commands directly (e.g., 'scan user@example.com').") + + def _scan(self, arg: str) -> None: + if not arg: out("warn","No target specified."); return + self._last = self.orc.scan(arg) + analysis = CredAnalyzer.analyze(self._last) + HVTAnalyzer.annotate(self._last) + hvt_records = HVTAnalyzer.filter_hvt(self._last) + prev = self._last_full or {} + self._last_full = { + "target": arg, + "records": self._last, + "analysis": analysis, + "hvt_records": hvt_records, + "dork_results": prev.get("dork_results", []), + "scrape_results": prev.get("scrape_results", {}), + "pivot_chain": [arg], + "pivot_log": [], + "discovered_assets": [], + "scan_meta": {"pivot_depth": 0, "nodes_discovered": len(self._last)}, + } + self.session_state["targets_scanned"].append(arg) + + W = 62 + rs = analysis.get("risk_score", 0) + sev = analysis.get("severity", {}) + col = C.R if rs > 60 else C.Y if rs > 30 else C.G + badge = (f"{C.R}[CRITICAL]{C.X}" if rs > 60 or sev.get("critical", 0) > 0 + else f"{C.Y}[HIGH]{C.X}" if rs > 30 or sev.get("high", 0) > 0 + else f"{C.G}[MEDIUM]{C.X}") + + print(f"\n {C.G}{'━'*W}{C.X}") + print(f" {C.G} BREACH SCAN RESULTS{C.X} {badge}") + print(f" {C.DM} Target: {arg}{C.X}") + print(f" {C.G}{'━'*W}{C.X}") + + # ── Stats grid ──────────────────────────────────────────────── + total = analysis.get("total_records", 0) + unique = analysis.get("unique_records", total) + emails = analysis.get("unique_emails", 0) + pw_cnt = analysis.get("passwords_found", 0) + stealer = analysis.get("stealer_logs", 0) + hvt_cnt = analysis.get("hvt_count", 0) + reused = len(analysis.get("reused_passwords", {})) + + print(f"\n {'Records':<26} {total} {C.DM}({unique} unique){C.X}") + print(f" {'Unique Emails':<26} {emails}") + print(f" {'Passwords Exposed':<26} {C.R}{pw_cnt}{C.X}") + print(f" {'Stealer Logs':<26} {C.R}{stealer}{C.X}") + print(f" {'High-Value Targets':<26} {C.O}{hvt_cnt}{C.X}") + print(f" {'Password Reuse':<26} {C.Y if reused else C.DM}{reused} password(s) reused{C.X}") + print(f" {'Risk Score':<26} {col}{rs}/100{C.X}") + print(f" {'Severity':<26} " + f"{C.R}{sev.get('critical',0)} CRIT{C.X} " + f"{C.Y}{sev.get('high',0)} HIGH{C.X} " + f"{sev.get('medium',0)} MED " + f"{C.DM}{sev.get('low',0)} LOW{C.X}") + + # ── Top exposed credentials ─────────────────────────────────── + creds = [(r, _rec_get(r, "password")) for r in self._last if _rec_get(r, "password")] + if creds: + print(f"\n {C.Y}┌─ TOP EXPOSED CREDENTIALS ({len(creds)} total) {'─'*(W-38)}┐{C.X}") + for r, pw in creds[:8]: + em = (_rec_get(r, "email") or _rec_get(r, "username") or "—")[:38] + src = _rec_get(r, "source") or "" + breach = _rec_get(r, "breach_name") or "" + rs_r = _rec_get(r, "risk_score") or 0 + rc = C.R if float(rs_r) >= 70 else C.Y if float(rs_r) >= 40 else C.W + masked = pw[:2] + "●" * min(len(pw) - 2, 8) if len(pw) > 2 else "●●●●" + ref_tag = f" {C.DM}[{breach or src}]{C.X}" if (breach or src) else "" + print(f" {C.Y}│{C.X} {C.CY}{em:<38}{C.X} {rc}{masked:<12}{C.X} {rc}risk:{rs_r}{C.X}{ref_tag}") + extra = self._record_assets(r) + if extra: print(f" {C.Y}│{C.X} {extra}") + if len(creds) > 8: + print(f" {C.Y}│{C.X} {C.DM}… and {len(creds)-8} more — use 'export' for the full list{C.X}") + print(f" {C.Y}└{'─'*(W-2)}┘{C.X}") + + # ── Non-credential assets (IPs, phones, domains, usernames, hashes) ── + other = [r for r in self._last if not _rec_get(r, "password")] + if other: + print(f"\n {C.B}┌─ DISCOVERED ASSETS ({len(other)}) {'─'*(W-22)}┐{C.X}") + for r in other[:10]: + ident = _rec_get(r, "email") or _rec_get(r, "username") or _rec_get(r, "ip_address") or _rec_get(r, "domain") or "—" + src = _rec_get(r, "source") or "" + breach = _rec_get(r, "breach_name") or "" + rs_r = _rec_get(r, "risk_score") or 0 + ref = breach or src + print(f" {C.B}│{C.X} {C.CY}{ident:<38}{C.X} {C.DM}risk:{rs_r} [{ref[:22]}]{C.X}") + extra = self._record_assets(r) + if extra: print(f" {C.B}│{C.X} {extra}") + if len(other) > 10: + print(f" {C.B}│{C.X} {C.DM}… and {len(other)-10} more — use 'export' for the full list{C.X}") + print(f" {C.B}└{'─'*(W-2)}┘{C.X}") + + # ── HVT alert ───────────────────────────────────────────────── + hvt = [r for r in self._last if HVTAnalyzer.is_hvt(r)] + if hvt: + print(f"\n {C.O}⚑ HIGH-VALUE TARGETS ({len(hvt)}){C.X}") + for r in hvt[:5]: + ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" + rs_r = _rec_get(r, "risk_score") or "" + print(f" {C.O}→{C.X} {ident:<45} {C.Y}risk: {rs_r}{C.X}") + if len(hvt) > 5: + print(f" {C.DM} … and {len(hvt)-5} more{C.X}") + + # ── Password reuse ──────────────────────────────────────────── + reused_map = analysis.get("reused_passwords", {}) + if reused_map: + print(f"\n {C.R}⚠ PASSWORD REUSE DETECTED{C.X}") + for pw, cnt in list(reused_map.items())[:4]: + masked = pw[:2] + "●" * (len(pw) - 2) if len(pw) > 2 else "●●●●" + print(f" {C.R}→{C.X} {masked} reused {cnt}× across breaches") + + print(f"\n {C.G}{'━'*W}{C.X}") + print(f" {C.DM}Use 'graph' for full report | 'export pdf/html/json' for forensic output{C.X}\n") + + def _fullscan(self, arg: str) -> None: + if not arg: out("warn","No target specified."); return + out("info", f"[autoscan] Starting full scan + pivot + dork + scrape for: {arg}") + # Seed the pivot chain immediately so it's visible even if the scan fails + if arg not in self.session_state["pivot_chain"]: + self.session_state["pivot_chain"].append(arg) + result = {"target": arg, "records": [], "dork_results": [], "scrape_results": {}, + "hvt_records": [], "pivot_chain": [arg], "pivot_log": [], "discovered_assets": [], "scan_meta": {}} + try: + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + if loop and loop.is_running(): + import concurrent.futures + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex: + result = ex.submit(asyncio.run, self.orc.fullscan(arg, pivot=not self.config.no_pivot)).result(timeout=600) + else: + result = asyncio.run(self.orc.fullscan(arg, pivot=not self.config.no_pivot)) + finally: + self._last = result.get("records", []) + self._last_full = result + self.session_state["targets_scanned"].append(arg) + for node in result.get("pivot_chain", [arg]): + if node not in self.session_state["pivot_chain"]: + self.session_state["pivot_chain"].append(node) + + scan_meta = result.get("scan_meta", {}) or {} + elapsed = scan_meta.get("elapsed_seconds") + depth = scan_meta.get("pivot_depth", 0) + nodes = scan_meta.get("nodes_discovered", 0) + analysis = result.get("analysis") or CredAnalyzer.analyze(self._last) + rs_total = analysis.get("risk_score", 0) + sev = analysis.get("severity", {}) + col = C.R if rs_total > 60 else C.Y if rs_total > 30 else C.G + badge = (f"{C.R}[CRITICAL]{C.X}" if rs_total > 60 or sev.get("critical", 0) > 0 + else f"{C.Y}[HIGH]{C.X}" if rs_total > 30 or sev.get("high", 0) > 0 + else f"{C.G}[MEDIUM]{C.X}") + W = 62 + + print(f"\n {C.G}{'━'*W}{C.X}") + print(f" {C.G} AUTOSCAN COMPLETE{C.X} {badge} {C.DM}target: {arg}{C.X}") + print(f" {C.G}{'━'*W}{C.X}") + + # ── Summary stats ───────────────────────────────────────────── + dork_count = len(result.get("dork_results", []) or []) + scrape_r = result.get("scrape_results", {}) or {} + paste_count = len(scrape_r.get("pastes", [])) + cred_sc_cnt = len(scrape_r.get("credentials", [])) + tg_count = len(scrape_r.get("telegram", [])) + mc_count = len(scrape_r.get("dork_misconfigs", [])) + + print(f"\n {'Records':<26} {analysis.get('total_records', len(self._last or []))}" + f" {C.DM}({analysis.get('unique_records', 0)} unique){C.X}") + print(f" {'Passwords Exposed':<26} {C.R}{analysis.get('passwords_found', 0)}{C.X}") + print(f" {'Stealer Logs':<26} {C.R}{analysis.get('stealer_logs', 0)}{C.X}") + print(f" {'High-Value Targets':<26} {C.O}{analysis.get('hvt_count', 0)}{C.X}") + print(f" {'Dork Hits':<26} {C.O}{dork_count}{C.X}") + print(f" {'Pastes Found':<26} {C.P}{paste_count}{C.X}") + if cred_sc_cnt: print(f" {'Scraped Credentials':<26} {C.R}{cred_sc_cnt}{C.X}") + if tg_count: print(f" {'Telegram Hits':<26} {C.CY}{tg_count}{C.X}") + if mc_count: print(f" {'Misconfigurations':<26} {C.O}{mc_count}{C.X}") + print(f" {'Nodes Discovered':<26} {nodes}") + print(f" {'Pivot Depth':<26} {depth}") + if elapsed is not None: print(f" {'Elapsed':<26} {elapsed:.1f}s") + da_cnt = len(result.get("discovered_assets", []) or []) + if da_cnt: print(f" {'Reinjected Assets':<26} {C.CY}{da_cnt}{C.X}") + print(f" {'Risk Score':<26} {col}{rs_total}/100{C.X}") + print(f" {'Severity':<26} " + f"{C.R}{sev.get('critical',0)} CRIT{C.X} " + f"{C.Y}{sev.get('high',0)} HIGH{C.X} " + f"{sev.get('medium',0)} MED") + + # ── High-Value Targets ──────────────────────────────────────── + hvt = result.get("hvt_records", []) + if hvt: + print(f"\n {C.O}{'─'*W}{C.X}") + print(f" {C.O}⚑ HIGH-VALUE TARGETS ({len(hvt)}){C.X}") + print(f" {C.O}{'─'*W}{C.X}") + for r in hvt[:10]: + ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" + rs = _rec_get(r, "risk_score") + print(f" {C.R}→{C.X} {C.W}{ident:<45}{C.X} {C.Y}risk: {rs}{C.X}") + extra = self._record_assets(r) + if extra: print(f" {extra}") + if len(hvt) > 10: + print(f" {C.DM} … and {len(hvt)-10} more — use 'graph' or 'export' for the full list{C.X}") + + # ── Discovered Assets (flat provenance table) ───────────────── + # ── Pivot Tree ──────────────────────────────────────────────── + pivot_log = result.get("pivot_log", []) + discovered_assets = result.get("discovered_assets", []) + if pivot_log: + print(f"\n {C.CY}{'─'*W}{C.X}") + print(f" {C.CY} PIVOT TREE ({len(pivot_log)} nodes){C.X}") + print(f" {C.CY}{'─'*W}{C.X}") + self._print_pivot_tree(pivot_log, result) + else: + # No avalanche engine — flat display + recs = self._last or [] + cred_recs = [r for r in recs if _rec_get(r, "password")] + other_recs = [r for r in recs if not _rec_get(r, "password")] + if cred_recs: + print(f"\n {C.R}{'─'*W}{C.X}") + print(f" {C.R}[!] EXPOSED CREDENTIALS ({len(cred_recs)}){C.X}") + print(f" {C.R}{'─'*W}{C.X}") + for r in cred_recs[:12]: + em = (_rec_get(r, "email") or _rec_get(r, "username") or "—")[:40] + pw = _rec_get(r, "password") or "" + src = _rec_get(r, "source") or "" + rs_r = _rec_get(r, "risk_score") or 0 + masked = pw[:2] + "●" * min(len(pw) - 2, 8) if len(pw) > 2 else "●●●●" + rc = C.R if float(rs_r) >= 70 else C.Y if float(rs_r) >= 40 else C.W + print(f" {C.R}→{C.X} {C.CY}{em:<40}{C.X} {rc}{masked}{C.X} {C.DM}[{src[:18]}] risk:{rs_r}{C.X}") + extra = self._record_assets(r) + if extra: print(f" {extra}") + if len(cred_recs) > 12: + print(f" {C.DM} … and {len(cred_recs)-12} more — use 'export'{C.X}") + if other_recs: + print(f"\n {C.B}{'─'*W}{C.X}") + print(f" {C.B}[~] DISCOVERED ASSETS ({len(other_recs)}){C.X}") + print(f" {C.B}{'─'*W}{C.X}") + for r in other_recs[:12]: + ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" + src = _rec_get(r, "source") or "" + rs_r = _rec_get(r, "risk_score") or 0 + print(f" {C.B}→{C.X} {C.CY}{ident:<40}{C.X} {C.DM}[{src[:18]}] risk:{rs_r}{C.X}") + extra = self._record_assets(r) + if extra: print(f" {extra}") + if len(other_recs) > 12: + print(f" {C.DM} … and {len(other_recs)-12} more — use 'export'{C.X}") + + # ── Flat discovered assets table ────────────────────────────── + if discovered_assets: + _phase_col = {"breach": C.R, "dork": C.O, "scrape": C.P, + "hash_crack": C.P, "seed": C.G} + print(f"\n {C.B}{'─'*W}{C.X}") + print(f" {C.B} DISCOVERED ASSETS ({len(discovered_assets)} new identifiers){C.X}") + print(f" {C.B}{'─'*W}{C.X}") + print(f" {C.DM} {'ASSET':<38} {'TYPE':<10} {'PHASE':<10} {'FOUND IN / REF'}{C.X}") + print(f" {C.DM} {'─'*38} {'─'*10} {'─'*10} {'─'*30}{C.X}") + for da in discovered_assets[:50]: + pc = _phase_col.get(da["phase"], C.DM) + ref = da.get("ref", "")[:55] + print(f" {C.CY} {da['asset']:<38}{C.X} {C.DM}{da['qtype']:<10}{C.X} " + f"{pc}{da['phase']:<10}{C.X} {C.DM}{ref}{C.X}") + if len(discovered_assets) > 50: + print(f" {C.DM} … and {len(discovered_assets)-50} more — use 'export' for full list{C.X}") + + print(f"\n {C.G}{'━'*W}{C.X}") + print(f" {C.DM}Use 'graph' for full intelligence report | 'export pdf/html/json' for forensic output{C.X}\n") + + def _print_pivot_tree(self, pivot_log: list, result: dict) -> None: + """Print the full pivot tree with per-node phase findings and reinjection details.""" + log_by_key = {e["asset"].lower(): e for e in pivot_log} + + # Index breach records by the scanned asset (matched by email/username/phone/domain) + all_recs = result.get("records", []) or [] + recs_by_asset: Dict[str, list] = {} + for r in all_recs: + # A record belongs to the asset whose value matches the record's identity fields + for fname in ("email", "username", "phone", "domain", "ip_address"): + v = _rec_get(r, fname) + if v: + recs_by_asset.setdefault(v.lower(), []).append(r) + break # one record → one bucket + + # Index dork/scrape hits by pivot_asset tag + dork_by_asset: Dict[str, list] = {} + for h in result.get("dork_results", []) or []: + dork_by_asset.setdefault(h.get("pivot_asset", "").lower(), []).append(h) + + scrape_by_asset: Dict[str, list] = {} + for cat in ("credentials", "pastes", "telegram", "dork_misconfigs"): + for item in (result.get("scrape_results", {}) or {}).get(cat, []): + if isinstance(item, dict): + scrape_by_asset.setdefault( + item.get("pivot_asset", "").lower(), [] + ).append((cat, item)) + + phase_colors = { + "seed": C.G, + "breach": C.R, + "dork": C.O, + "scrape": C.P, + "hash_crack": C.P, + "pivot": C.CY, + } + + def _print_node(entry: dict, prefix: str, is_last: bool) -> None: + asset = entry["asset"] + qtype = entry["qtype"] + found_in = entry.get("found_in", entry.get("source", "?")) + n_rec = entry["records"] + n_dork = entry["dorks"] + n_sc = entry["scrape"] + cracked = entry.get("cracked") or [] + children = entry.get("children", []) # list of dicts: {asset,qtype,found_in,ref} + + conn = "└─" if is_last else "├─" + fc = phase_colors.get(found_in, C.DM) + tag = f"{fc}[{found_in.upper()}]{C.X}" + hvt_flag = "" + # Check if this asset appears in HVT records + for r in (result.get("hvt_records", []) or []): + if ((_rec_get(r, "email") or _rec_get(r, "username") or "") == asset): + hvt_flag = f" {C.O}⚑HVT{C.X}" + break + + print(f" {prefix}{C.DM}{conn}{C.X} {tag} {C.W}{asset}{C.X} {C.DM}({qtype}){C.X}{hvt_flag}") + cp = prefix + (" " if is_last else "│ ") + + # Stats + stats = [] + if n_rec: stats.append(f"{C.R}{n_rec} breach{C.X}") + if n_dork: stats.append(f"{C.O}{n_dork} dork{C.X}") + if n_sc: stats.append(f"{C.P}{n_sc} scrape{C.X}") + if cracked: stats.append(f"{C.P}cracked→{', '.join(cracked[:2])}{C.X}") + if stats: + print(f" {cp} {C.DM}results:{C.X} {' | '.join(stats)}") + + # Breach records for this asset + key = asset.lower() + asset_recs = recs_by_asset.get(key, []) + cred_recs = [r for r in asset_recs if _rec_get(r, "password")] + other_recs = [r for r in asset_recs if not _rec_get(r, "password")] + for r in cred_recs[:4]: + em = (_rec_get(r, "email") or _rec_get(r, "username") or "—")[:32] + pw = _rec_get(r, "password") or "" + src = _rec_get(r, "source") or "" + rs_r = float(_rec_get(r, "risk_score") or 0) + masked = pw[:2] + "●" * min(len(pw)-2, 6) if len(pw) > 2 else "●●●●" + rc = C.R if rs_r >= 70 else C.Y if rs_r >= 40 else C.W + extra = self._record_assets(r) + print(f" {cp} {C.R}breach{C.X} {C.CY}{em}{C.X} {rc}{masked}{C.X} " + f"{C.DM}[{src[:20]}] risk:{rs_r:.0f}{C.X}") + if extra: print(f" {cp} {extra}") + if len(cred_recs) > 4: + print(f" {cp} {C.DM}… +{len(cred_recs)-4} more credentials{C.X}") + for r in other_recs[:2]: + ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" + extra = self._record_assets(r) + src = _rec_get(r, "source") or "" + print(f" {cp} {C.B}asset{C.X} {C.CY}{ident}{C.X} {C.DM}[{src[:20]}]{C.X}") + if extra: print(f" {cp} {extra}") + if len(other_recs) > 2: + print(f" {cp} {C.DM}… +{len(other_recs)-2} more assets{C.X}") + + # Dork hits for this asset + for h in dork_by_asset.get(key, [])[:3]: + url = h.get("url", "")[:70] + dork = h.get("dork", "")[:60] + print(f" {cp} {C.O}dork{C.X} {C.DM}{url or dork}{C.X}") + if url and dork: + print(f" {cp} {C.DM}query: {dork[:60]}{C.X}") + if len(dork_by_asset.get(key, [])) > 3: + print(f" {cp} {C.DM}… +{len(dork_by_asset[key])-3} more dork hits{C.X}") + + # Scrape items for this asset + for cat, item in scrape_by_asset.get(key, [])[:3]: + if cat == "credentials": + print(f" {cp} {C.R}cred{C.X} {item.get('raw','')[:65]}") + elif cat == "telegram": + print(f" {cp} {C.CY}tg{C.X} [{item.get('channel','')}] {item.get('text','')[:55]}") + elif cat == "pastes": + pats = ", ".join(f"{k}({len(v)})" for k,v in (item.get("patterns") or {}).items()) + print(f" {cp} {C.P}paste{C.X} [{item.get('site','')}] {item.get('id','')[:30]} {C.DM}{pats}{C.X}") + elif cat == "dork_misconfigs": + print(f" {cp} {C.O}misc{C.X} {item.get('url', item.get('title',''))[:65]}") + if len(scrape_by_asset.get(key, [])) > 3: + print(f" {cp} {C.DM}… +{len(scrape_by_asset[key])-3} more scrape items{C.X}") + + # Children — show what was discovered and from which phase + if children: + print(f" {cp} {C.DM}↳ reinjected {len(children)} new asset(s):{C.X}") + for ch in children[:8]: + ch_asset = ch.get("asset", "") + ch_qt = ch.get("qtype", "") + ch_phase = ch.get("found_in", "?") + ch_ref = ch.get("ref", "")[:55] + ch_color = phase_colors.get(ch_phase, C.DM) + # Show whether this child was itself processed (has a log entry) + processed = "✓" if ch_asset.lower() in log_by_key else "…" + print(f" {cp} {processed} {ch_color}[{ch_phase}]{C.X} " + f"{C.CY}{ch_asset}{C.X} {C.DM}({ch_qt}) ref: {ch_ref}{C.X}") + if len(children) > 8: + print(f" {cp} {C.DM}… +{len(children)-8} more{C.X}") + + # Recurse into child log entries + child_log_entries = [log_by_key[ch["asset"].lower()] + for ch in children + if ch.get("asset","").lower() in log_by_key] + for i, child_entry in enumerate(child_log_entries): + _print_node(child_entry, cp, is_last=(i == len(child_log_entries)-1)) + + roots = [e for e in pivot_log if e["depth"] == 0] + for i, root in enumerate(roots): + _print_node(root, "", is_last=(i == len(roots)-1)) + + def _dork(self, arg: str) -> None: + if not arg: out("warn","No target specified."); return + results = self.orc.dork(arg) + prev = self._last_full or {} + self._last_full = { + "target": arg if not prev.get("target") else prev["target"], + "records": prev.get("records", self._last or []), + "analysis": prev.get("analysis", {}), + "hvt_records": prev.get("hvt_records", []), + "dork_results": results, + "scrape_results": prev.get("scrape_results", {}), + "pivot_chain": prev.get("pivot_chain", [arg]), + "pivot_log": prev.get("pivot_log", []), + "discovered_assets": prev.get("discovered_assets", []), + "scan_meta": prev.get("scan_meta", {}), + } + if not self._last: + self._last = self._last_full["records"] + + W = 62 + print(f"\n {C.O}{'━'*W}{C.X}") + print(f" {C.O} DORK RESULTS{C.X} {C.DM}target: {arg}{C.X}") + print(f" {C.O}{'━'*W}{C.X}") + + if not results: + print(f"\n {C.DM} No results found.{C.X}") + else: + # Group by engine + by_engine: Dict[str, list] = {} + for r in results: + eng = r.get("engine", "Unknown") + by_engine.setdefault(eng, []).append(r) + + print(f"\n {C.W}Total hits: {C.O}{len(results)}{C.X} " + f"{C.DM}engines: {', '.join(f'{e}({len(v)})' for e, v in by_engine.items())}{C.X}\n") + + for i, r in enumerate(results[:20], 1): + title = (r.get("title") or r.get("dork") or "")[:65] + url = r.get("url", "") + snippet = r.get("snippet", "")[:110] + engine = r.get("engine", "") + dork_q = r.get("dork", "")[:60] + eng_tag = f" {C.DM}[{engine}]{C.X}" if engine else "" + print(f" {C.O}{i:2}.{C.X} {C.W}{title}{C.X}{eng_tag}") + if url: + print(f" {C.CY}{url[:80]}{C.X}") + if snippet: + print(f" {C.DM}{snippet}{C.X}") + if dork_q and dork_q != title: + print(f" {C.DM}dork: {dork_q}{C.X}") + print() + + if len(results) > 20: + print(f" {C.DM} … and {len(results)-20} more — use 'export' for the full list{C.X}") + + print(f" {C.O}{'━'*W}{C.X}") + print(f" {C.DM}Use 'export html/pdf/json' to save the full dork report.{C.X}\n") + + def _scrape(self, arg: str) -> None: + if not arg: out("warn","No target specified."); return + results = self.orc.scrape(arg) + prev = self._last_full or {} + self._last_full = { + "target": arg if not prev.get("target") else prev["target"], + "records": prev.get("records", self._last or []), + "analysis": prev.get("analysis", {}), + "hvt_records": prev.get("hvt_records", []), + "dork_results": prev.get("dork_results", []), + "scrape_results": results, + "pivot_chain": prev.get("pivot_chain", [arg]), + "pivot_log": prev.get("pivot_log", []), + "discovered_assets": prev.get("discovered_assets", []), + "scan_meta": prev.get("scan_meta", {}), + } + if not self._last: + self._last = self._last_full["records"] + + pastes = results.get("pastes", []) + creds = results.get("credentials", []) + hashes = results.get("hashes", []) + tg = results.get("telegram", []) + mc = results.get("dork_misconfigs", []) + total = len(pastes) + len(creds) + len(tg) + len(mc) + + W = 62 + print(f"\n {C.P}{'━'*W}{C.X}") + print(f" {C.P} SCRAPE RESULTS{C.X} {C.DM}target: {arg}{C.X}") + print(f" {C.P}{'━'*W}{C.X}") + + # ── Summary row ─────────────────────────────────────────────── + print(f"\n {'Pastes':<20} {C.P}{len(pastes)}{C.X}") + print(f" {'Credentials':<20} {C.R}{len(creds)}{C.X}") + print(f" {'Hashes':<20} {C.Y}{len(hashes)}{C.X}") + print(f" {'Telegram Hits':<20} {C.CY}{len(tg)}{C.X}") + print(f" {'Misconfigurations':<20} {C.O}{len(mc)}{C.X}") + + # ── Pastes ──────────────────────────────────────────────────── + _paste_url_tmpl = { + "Pastebin": "https://pastebin.com/{}", "Rentry": "https://rentry.co/{}", + "Hastebin": "https://hastebin.com/{}", "DPaste": "https://dpaste.org/{}", + "Ghostbin": "https://ghostbin.com/paste/{}", "JustPaste": "https://justpaste.it/{}", + "ControlC": "https://controlc.com/{}", "Paste2": "https://paste2.org/raw/{}", + } + if pastes: + print(f"\n {C.P}┌─ PASTES ({len(pastes)}) {'─'*(W-14)}┐{C.X}") + for p in pastes[:10]: + site = p.get("site", "") + pid = p.get("id", "") + title = (p.get("title") or pid)[:45] + pats = ", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items()) + tmpl = _paste_url_tmpl.get(site, "") + url = tmpl.format(pid) if tmpl and pid else "" + pat_tag = f" {C.DM}{pats}{C.X}" if pats else "" + print(f" {C.P}│{C.X} {C.DM}[{site}]{C.X} {title}{pat_tag}") + if url: + print(f" {C.P}│{C.X} {C.CY} {url}{C.X}") + if len(pastes) > 10: + print(f" {C.P}│{C.X} {C.DM}… and {len(pastes)-10} more{C.X}") + print(f" {C.P}└{'─'*(W-2)}┘{C.X}") + + # ── Extracted credentials ───────────────────────────────────── + if creds: + print(f"\n {C.R}┌─ EXTRACTED CREDENTIALS ({len(creds)}) {'─'*(W-26)}┐{C.X}") + for c in creds[:12]: + raw = c.get("raw", "")[:75] + src = c.get("source", "") + src_tag = f" {C.DM}[{src}]{C.X}" if src else "" + print(f" {C.R}│{C.X} {C.R}{raw}{C.X}{src_tag}") + if len(creds) > 12: + print(f" {C.R}│{C.X} {C.DM}… and {len(creds)-12} more — use 'export' for the full list{C.X}") + print(f" {C.R}└{'─'*(W-2)}┘{C.X}") + + # ── Telegram CTI ────────────────────────────────────────────── + if tg: + print(f"\n {C.CY}┌─ TELEGRAM CTI ({len(tg)}) {'─'*(W-18)}┐{C.X}") + for t in tg[:6]: + ch = t.get("channel", "") + text = t.get("text", "")[:65] + pats = ", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items()) + pat_tag = f" {C.DM}{pats}{C.X}" if pats else "" + print(f" {C.CY}│{C.X} {C.DM}[{ch}]{C.X} {text}{pat_tag}") + if len(tg) > 6: + print(f" {C.CY}│{C.X} {C.DM}… and {len(tg)-6} more{C.X}") + print(f" {C.CY}└{'─'*(W-2)}┘{C.X}") + + # ── Misconfigurations ───────────────────────────────────────── + if mc: + print(f"\n {C.O}┌─ MISCONFIGURATIONS ({len(mc)}) {'─'*(W-22)}┐{C.X}") + for m in mc[:6]: + title = m.get("title", "")[:55] + url = m.get("url", "")[:70] + dork = m.get("dork", "")[:55] + print(f" {C.O}│{C.X} {C.W}{title}{C.X}") + if url: + print(f" {C.O}│{C.X} {C.DM}{url}{C.X}") + if dork and dork != title: + print(f" {C.O}│{C.X} {C.DM}dork: {dork}{C.X}") + if len(mc) > 6: + print(f" {C.O}│{C.X} {C.DM}… and {len(mc)-6} more{C.X}") + print(f" {C.O}└{'─'*(W-2)}┘{C.X}") + + if total == 0: + print(f"\n {C.DM} No results found.{C.X}") + + print(f"\n {C.P}{'━'*W}{C.X}") + print(f" {C.DM}Use 'export html/pdf/json' to save the full scrape report.{C.X}\n") + + def _crack(self, arg: str) -> None: + if not arg: out("warn","No hash specified."); return + out("info", f" Cracking: {arg}") + result = self.orc.crack(arg) + out("info", f" Possible types: {', '.join(t[0] for t in result.get('types',[]))}") + if result.get("plaintext"): + out("ok", f" ✓ CRACKED: {result['plaintext']}") + out("info", f" Method: {result['method']}") + else: + out("warn", " Could not crack this hash with available methods.") + + def _analyze(self, arg: str) -> None: + if not arg: out("warn","No password specified."); return + r = self.orc.analyze_pass(arg) + print(f"\n {C.G}Password Analysis{C.W}\n {'─'*40}") + print(f" Password: {C.Y}{r['password']}{C.W}") + print(f" Length: {r['length']}") + print(f" Charsets: {', '.join(r['charsets'])}") + print(f" Entropy: {r['entropy']} bits") + print(f" Score: {r['score']}/100 ({r['strength']})") + if r["patterns"]: + print(f"\n {C.R}Patterns Detected:{C.W}") + for p in r["patterns"]: print(f" ⚠ {p}") + print(f"\n {C.G}Crack Time Estimates:{C.W}") + for label, time_str in r["crack_times"].items(): + print(f" {label:<30} {time_str}") + + def _sources(self) -> None: + """ + --list-sources / REPL 'sources': debug/operator view. + Shows every plugin with input_type, confidence, key status, and load errors. + """ + # Ensure orchestrator and source orchestrator are initialised + if self.orc._source_orchestrator is None: + self.orc._source_orchestrator = SourceOrchestrator( + asyncio.Semaphore(self.orc.config.concurrency), self.db, self.orc.config + ) + + # Scan sources dir directly to count total JSON files (including failed ones) + json_files = list(SOURCE_DIR.glob("*.json")) + total_files = len(json_files) + + # Track load failures by attempting to parse each file + failed: List[str] = [] + for jf in json_files: + try: + json.loads(jf.read_text(encoding="utf-8")) + except Exception as exc: + failed.append(f"{jf.name}: {exc}") + + self.orc._source_orchestrator._ensure_loaded() + all_sources = ( + self.orc._source_orchestrator._nox_sources + + self.orc._source_orchestrator._fs_providers + + self.orc._source_orchestrator._py_providers + ) + loaded = len(all_sources) + skipped = total_files - loaded # files that parsed but produced no source (e.g. key missing) + + W = 62 + print(f"\n {C.G}{'━'*W}{C.X}") + print(f" {C.G} PLUGIN DEBUG — LOADED SOURCES{C.X}") + print(f" {C.G}{'━'*W}{C.X}") + print(f"\n {C.W}Total JSON files in sources/:{C.X} {total_files}") + print(f" {C.G}Loaded:{C.X} {loaded}") + if skipped: + print(f" {C.Y}Skipped (key missing/invalid):{C.X} {skipped}") + if failed: + print(f" {C.R}Parse errors:{C.X} {len(failed)}") + print() + + if not all_sources: + out("err", "No plugins loaded. Run: python build_sources.py") + return + + # Column header + print(f" {C.DM}{'#':>3} {'NAME':<28} {'INPUT':<10} {'CONF':>5} {'KEY STATUS'}{C.X}") + print(f" {C.DM}{'─'*3} {'─'*28} {'─'*10} {'─'*5} {'─'*30}{C.X}") + + for i, src in enumerate(all_sources, 1): + defn = getattr(src, "_def", {}) or {} + name = src.name + input_type = defn.get("input_type", "any") + conf = defn.get("confidence", "") + conf_str = f"{conf:.2f}" if isinstance(conf, float) else (str(conf) if conf else " — ") + + # Key status + slots = defn.get("api_key_slots", []) + key_name = (defn.get("required_api_key_name", "") + or (slots[0].strip("{}") if slots else "")) + needs_key = getattr(src, "needs_key", bool(key_name)) + + if not needs_key: + key_col = f"{C.G}public (no key){C.X}" + else: + api_key = getattr(src, "_api_key", "") or "" + if api_key: + masked = f"****{api_key[-4:]}" if len(api_key) >= 4 else "****" + key_col = f"{C.G}configured ({masked}){C.X}" + else: + key_col = f"{C.R}NOT configured [{key_name}]{C.X}" + + # Colour name by key status + name_col = (C.G if (not needs_key or api_key) else C.Y) + f"{name:<28}" + C.X + print(f" {C.DM}{i:>3}.{C.X} {name_col} {C.DM}{input_type:<10}{C.X} {C.CY}{conf_str:>5}{C.X} {key_col}") + + # Parse errors detail + if failed: + print(f"\n {C.R}Parse errors:{C.X}") + for err in failed: + print(f" {C.R}✗{C.X} {err}") + + print(f"\n {C.DM}Tip: set keys directly in ~/.config/nox-cli/apikeys.json (chmod 0600).{C.X}") + print(f" {C.G}{'━'*W}{C.X}\n") + + def _export(self, arg: str) -> None: + if not self._last and self._last_full: + self._last = self._last_full.get("records", []) + # Allow export even with no breach records if dork/scrape results exist + full = self._last_full or {} + has_dork = bool(full.get("dork_results")) + has_scrape = bool(full.get("scrape_results")) + if not self._last and not has_dork and not has_scrape: + out("warn", " No results to export. Run a scan, dork, or scrape first."); return + parts = arg.split() if arg else [] + fmt = None + remaining = [] + i = 0 + while i < len(parts): + if parts[i] == "--format" and i + 1 < len(parts): + fmt = parts[i + 1]; i += 2 + elif parts[i].startswith("--format="): + fmt = parts[i].split("=", 1)[1]; i += 1 + else: + remaining.append(parts[i]); i += 1 + _known = {"json", "csv", "html", "md", "pdf"} + if fmt is None and remaining and remaining[0].lower() in _known: + fmt = remaining.pop(0) + fmt = (fmt or "html").lower() + path = remaining[0] if remaining else f"nox_report_{int(time.time())}.{fmt}" + data = full if isinstance(full, dict) and ("records" in full or has_dork or has_scrape) \ + else {"target": "unknown", "records": self._last} + # Ensure records key always present + if "records" not in data: + data = dict(data); data["records"] = self._last + inv = self.session_state.get("investigator_id", "NOX-AUTO") + if fmt == "json": Reporter.to_json(data, path) + elif fmt == "csv": + resolved = Reporter._resolve_path(path, "csv") + Reporter.to_csv(self._last, resolved) + # G4: derive base from the resolved (absolute) path so companion files + # land in REPORT_DIR, not the current working directory + self._export_csv_extras(data, resolved) + elif fmt == "html": Reporter.to_html(data, path) + elif fmt == "md": Reporter.to_markdown(data, path) + elif fmt == "pdf": Reporter.to_pdf(data, path, investigator_id=inv) + else: out("warn", f" Unknown format: {fmt}. Use json/csv/html/md/pdf") + + @staticmethod + def _export_csv_extras(data: dict, base_path: str) -> None: + """Write dork and scrape results as companion CSV files alongside the main breach CSV.""" + import csv as _csv + base = base_path.rsplit(".", 1)[0] + + dork_results = data.get("dork_results", []) or [] + if dork_results: + dork_path = f"{base}_dorks.csv" + with open(dork_path, "w", newline="", encoding="utf-8") as f: + w = _csv.DictWriter(f, fieldnames=["url", "title", "snippet", "dork", "engine"], extrasaction="ignore") + w.writeheader() + w.writerows(dork_results) + out("ok", f"Dork results CSV saved: {dork_path}") + + scrape = data.get("scrape_results", {}) or {} + pastes = scrape.get("pastes", []) + creds = scrape.get("credentials", []) + tg = scrape.get("telegram", []) + mc = scrape.get("dork_misconfigs", []) + + if pastes: + p_path = f"{base}_pastes.csv" + with open(p_path, "w", newline="", encoding="utf-8") as f: + w = _csv.DictWriter(f, fieldnames=["site", "id", "title", "query"], extrasaction="ignore") + w.writeheader() + w.writerows(pastes) + out("ok", f"Pastes CSV saved: {p_path}") + if creds: + c_path = f"{base}_scraped_creds.csv" + with open(c_path, "w", newline="", encoding="utf-8") as f: + w = _csv.DictWriter(f, fieldnames=["raw", "source", "paste_id"], extrasaction="ignore") + w.writeheader() + w.writerows(creds) + out("ok", f"Scraped credentials CSV saved: {c_path}") + if tg: + t_path = f"{base}_telegram.csv" + with open(t_path, "w", newline="", encoding="utf-8") as f: + w = _csv.DictWriter(f, fieldnames=["channel", "text"], extrasaction="ignore") + w.writeheader() + w.writerows(tg) + out("ok", f"Telegram hits CSV saved: {t_path}") + if mc: + m_path = f"{base}_misconfigs.csv" + with open(m_path, "w", newline="", encoding="utf-8") as f: + w = _csv.DictWriter(f, fieldnames=["url", "title", "dork"], extrasaction="ignore") + w.writeheader() + w.writerows(mc) + out("ok", f"Misconfigurations CSV saved: {m_path}") + + discovered_assets = data.get("discovered_assets", []) or [] + if discovered_assets: + da_path = f"{base}_discovered_assets.csv" + with open(da_path, "w", newline="", encoding="utf-8") as f: + w = _csv.DictWriter(f, fieldnames=["asset", "qtype", "phase", "ref", "parent", "depth"], extrasaction="ignore") + w.writeheader() + w.writerows(discovered_assets) + out("ok", f"Discovered assets CSV saved: {da_path}") + + def _config(self, arg: str) -> None: + parts = arg.split(None, 1) if arg else [] + if len(parts) < 2: + out("info", " Config: threads, timeout, tor, proxy") + out("dim", " Usage: config "); return + k, v = parts + try: + if k == "threads": self.config.max_threads = self.config.concurrency = int(v) + elif k == "timeout": self.config.timeout = int(v) + elif k == "tor": + self.config.use_tor = v.lower() in ("true","1","yes","on") + if self.config.use_tor: self.config.proxy = f"socks5h://127.0.0.1:{self.config.tor_socks}" + self._refresh_session() + elif k == "proxy": + self.config.proxy = v if v != "none" else None + self._refresh_session() + else: + out("warn", f" Unknown config key: {k}"); return + except ValueError: + out("err", f" Invalid value for {k}: {v!r}"); return + out("ok", f" {k} = {v}") + + def _tor(self) -> None: + self.config.use_tor = not self.config.use_tor + status = "ENABLED" if self.config.use_tor else "DISABLED" + out("ok" if self.config.use_tor else "warn", f" Tor routing: {status}") + if self.config.use_tor: + self.config.proxy = f"socks5h://127.0.0.1:{self.config.tor_socks}" + else: + self.config.proxy = None + self._refresh_session() + + def _proxy(self, arg: str) -> None: + if not arg: + out("info", f" Current proxy: {self.config.proxy or 'None'}") + out("dim", " Usage: proxy | proxy none"); return + self.config.proxy = None if arg.lower() == "none" else arg + out("ok", f" Proxy {'disabled' if not self.config.proxy else f'set: {arg}'}") + self._refresh_session() + + def _refresh_session(self) -> None: + self.orc.session = Session(self.config) + self.orc.dork_engine.s = self.orc.session + self.orc.scrape_engine.s = self.orc.session + self.orc.hash_engine._session = self.orc.session + # G2: also rebuild dorking_engine so it picks up the new proxy/Tor config + self.orc.dorking_engine = DorkingEngine(self.config.concurrency, self.orc.db, self.config) + + # ── Investigation Dashboard ──────────────────────────────────────────── + + @staticmethod + def _risk_badge(analysis: dict) -> str: + rs = analysis.get("risk_score", 0) if analysis else 0 + sev = analysis.get("severity", {}) if analysis else {} + if rs > 60 or sev.get("critical", 0) > 0: + return f"{C.R}[CRITICAL]{C.W}" + if rs > 30 or sev.get("high", 0) > 0: + return f"{C.Y}[HIGH]{C.W}" + return f"{C.G}[MEDIUM]{C.W}" + + def _graph(self) -> None: + """Mini forensic report — printed after autoscan or on demand.""" + if not self._last and self._last_full: + self._last = self._last_full.get("records", []) + full = self._last_full or {} + if not full.get("target"): + out("warn", "No results loaded. Run a scan, dork, or scrape first."); return + if self._last is None: + self._last = [] + + full = self._last_full or {} + target = full.get("target", "unknown") + analysis = full.get("analysis") or {} + badge = self._risk_badge(analysis) + W = 62 + + print(f"\n {C.G}{'━'*W}{C.X}") + print(f" {C.G} NOX INTELLIGENCE REPORT{C.X} {badge}") + print(f" {C.G}{'━'*W}{C.X}") + ts = full.get("timestamp") or "" + print(f" Target : {C.BD}{target}{C.X}") + if ts: + print(f" Timestamp: {C.DM}{ts}{C.X}") + + rs = analysis.get("risk_score", 0) + sev = analysis.get("severity", {}) + col = C.R if rs > 60 else C.Y if rs > 30 else C.G + print(f"\n {C.Y}[ EXECUTIVE SUMMARY ]{C.X}") + + scan_meta = full.get("scan_meta", {}) or {} + pivot_depth = scan_meta.get("pivot_depth", 0) + nodes = scan_meta.get("nodes_discovered", 0) + elapsed = scan_meta.get("elapsed_seconds") + dork_count = len(full.get("dork_results", []) or []) + scrape_r = full.get("scrape_results", {}) or {} + paste_cnt = len(scrape_r.get("pastes", [])) + cred_sc_cnt = len(scrape_r.get("credentials", [])) + tg_cnt = len(scrape_r.get("telegram", [])) + mc_cnt = len(scrape_r.get("dork_misconfigs", [])) + + print(f" Records : {analysis.get('total_records', len(self._last or []))}" + f" {C.DM}({analysis.get('unique_records',0)} unique){C.X}") + print(f" Unique Emails : {analysis.get('unique_emails', 0)}") + print(f" Passwords Found : {C.R}{analysis.get('passwords_found', 0)}{C.X}") + print(f" Stealer Logs : {C.R}{analysis.get('stealer_logs', 0)}{C.X}") + print(f" HVT Accounts : {C.O}{analysis.get('hvt_count', 0)}{C.X}") + if dork_count: print(f" Dork Hits : {C.O}{dork_count}{C.X}") + if paste_cnt: print(f" Pastes : {C.P}{paste_cnt}{C.X}") + if cred_sc_cnt: print(f" Scraped Creds : {C.R}{cred_sc_cnt}{C.X}") + if tg_cnt: print(f" Telegram Hits : {C.CY}{tg_cnt}{C.X}") + if mc_cnt: print(f" Misconfigs : {C.O}{mc_cnt}{C.X}") + if nodes: print(f" Nodes Discovered : {nodes}") + if pivot_depth: print(f" Pivot Depth : {pivot_depth}") + if elapsed is not None: print(f" Scan Duration : {elapsed:.1f}s") + da_cnt = len(full.get("discovered_assets", []) or []) + if da_cnt: print(f" Reinjected Assets: {C.CY}{da_cnt}{C.X}") + print(f" Risk Score : {col}{rs}/100{C.X}") + print(f" Severity : {C.R}{sev.get('critical',0)} CRIT{C.X} " + f"{C.Y}{sev.get('high',0)} HIGH{C.X} {sev.get('medium',0)} MED") + + # Pivot chain — prefer the one from the fullscan result (avalanche order) + pivot_log = full.get("pivot_log", []) + chain = full.get("pivot_chain") or self.session_state.get("pivot_chain", []) + + if pivot_log: + print(f"\n {C.Y}[ PIVOT TREE ({len(pivot_log)} nodes) ]{C.X}") + self._print_pivot_tree(pivot_log, full) + # Show discovered assets after pivot tree + discovered_assets = full.get("discovered_assets", []) or [] + if discovered_assets: + _phase_col = {"breach": C.R, "dork": C.O, "scrape": C.P, + "hash_crack": C.P, "seed": C.G} + print(f"\n {C.Y}[ DISCOVERED ASSETS ({len(discovered_assets)} new identifiers) ]{C.X}") + print(f" {C.DM} {'ASSET':<38} {'TYPE':<10} {'PHASE':<10} REFERENCE{C.X}") + for da in discovered_assets[:30]: + pc = _phase_col.get(da["phase"], C.DM) + ref = da.get("ref", "")[:55] + print(f" {C.CY} {da['asset']:<38}{C.X} {C.DM}{da['qtype']:<10}{C.X} " + f"{pc}{da['phase']:<10}{C.X} {C.DM}{ref}{C.X}") + if len(discovered_assets) > 30: + print(f" {C.DM} … and {len(discovered_assets)-30} more — use 'export'{C.X}") + else: + # No pivot log — flat display + if len(chain) > 1: + print(f"\n {C.Y}[ PIVOT CHAIN ({len(chain)} nodes) ]{C.X}") + for i, node in enumerate(chain[:20]): + pfx = " " if i == 0 else " ↳ " + print(f" {C.DM}{pfx}{C.X}{C.CY}{node}{C.X}") + if len(chain) > 20: + print(f" {C.DM} … and {len(chain)-20} more nodes{C.X}") + + hvt = full.get("hvt_records", []) + if hvt: + print(f"\n {C.Y}[ HIGH-VALUE TARGETS ]{C.X}") + for r in hvt[:8]: + ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" + rs_r = _rec_get(r, "risk_score") or "" + rs_tag = f" {C.Y}risk:{rs_r}{C.X}" if rs_r else "" + print(f" {C.R}⚑{C.X} {ident}{rs_tag}") + + creds = [(r, _rec_get(r, "password")) for r in self._last if _rec_get(r, "password")] + other_assets = [r for r in self._last if not _rec_get(r, "password") and + (_rec_get(r, "email") or _rec_get(r, "username") or + _rec_get(r, "ip_address") or _rec_get(r, "phone"))] + if creds: + print(f"\n {C.Y}[ EXPOSED CREDENTIALS ]{C.X}") + for r, pw in creds[:10]: + em = _rec_get(r, "email") or _rec_get(r, "username") or "—" + src = _rec_get(r, "source") or "" + masked = pw[:2] + "●" * min(len(pw) - 2, 8) if len(pw) > 2 else "●●●●" + print(f" {C.R}→{C.X} {C.CY}{em}{C.X} {C.R}{masked}{C.X} {C.DM}[{src}]{C.X}") + extra = REPL._record_assets(r) + if extra: print(f" {extra}") + if len(creds) > 10: + print(f" {C.DM} … and {len(creds)-10} more — use 'export'{C.X}") + if other_assets: + print(f"\n {C.Y}[ DISCOVERED ASSETS ({len(other_assets)}) ]{C.X}") + for r in other_assets[:15]: + ident = _rec_get(r, "email") or _rec_get(r, "username") or "—" + src = _rec_get(r, "source") or "" + print(f" {C.Y}→{C.X} {C.CY}{ident}{C.X} {C.DM}← {src}{C.X}") + extra = REPL._record_assets(r) + if extra: print(f" {extra}") + if len(other_assets) > 15: + print(f" {C.DM} … and {len(other_assets)-15} more — use 'export'{C.X}") + + reused = analysis.get("reused_passwords", {}) + if reused: + print(f"\n {C.Y}[ PASSWORD REUSE ]{C.X}") + for pw, cnt in list(reused.items())[:5]: + masked = pw[:2] + "●" * (len(pw) - 2) if len(pw) > 2 else "●●●●" + print(f" {C.R}⚠{C.X} {masked} → reused {cnt}× across breaches") + + dorks = full.get("dork_results", []) + if dorks: + print(f"\n {C.Y}[ DORK FINDINGS ({len(dorks)}) ]{C.X}") + for d in dorks[:5]: + url = d.get("url", "") or d.get("title", "") + dork_q = d.get("dork", "")[:50] + print(f" {C.Y}→{C.X} {C.DM}{url[:70]}{C.X}") + if dork_q: print(f" {C.DM}dork: {dork_q}{C.X}") + if len(dorks) > 5: + print(f" {C.DM} … and {len(dorks)-5} more — use 'export'{C.X}") + + scrape = full.get("scrape_results", {}) or {} + scraped_creds = scrape.get("credentials", []) + tg = scrape.get("telegram", []) + misconfigs = scrape.get("dork_misconfigs", []) + pastes = scrape.get("pastes", []) + if scraped_creds or tg or misconfigs or pastes: + print(f"\n {C.Y}[ SCRAPE FINDINGS ]{C.X}") + if pastes: + print(f" Pastes : {C.P}{len(pastes)}{C.X}") + for p in pastes[:3]: + print(f" {C.P}→{C.X} [{p.get('site','')}] {p.get('id','')[:30]}") + if scraped_creds: + print(f" Credentials : {C.R}{len(scraped_creds)}{C.X}") + for c in scraped_creds[:5]: + print(f" {C.R}→{C.X} {c.get('raw','')[:70]}") + if tg: + print(f" Telegram : {C.CY}{len(tg)}{C.X}") + for t in tg[:3]: + print(f" {C.CY}→{C.X} [{t.get('channel','')}] {t.get('text','')[:60]}") + if misconfigs: + print(f" Misconfigs : {C.O}{len(misconfigs)}{C.X}") + for m in misconfigs[:3]: + print(f" {C.O}→{C.X} {m.get('title','')[:60]}") + + # ── Discovered Assets (flat provenance) ─────────────────────── + discovered_assets = full.get("discovered_assets", []) or [] + if discovered_assets: + _phase_col = {"breach": C.R, "dork": C.O, "scrape": C.P, + "hash_crack": C.P, "seed": C.G} + print(f"\n {C.Y}[ DISCOVERED ASSETS ({len(discovered_assets)} new identifiers) ]{C.X}") + print(f" {C.DM} {'ASSET':<38} {'TYPE':<10} {'PHASE':<10} REFERENCE{C.X}") + for da in discovered_assets[:30]: + pc = _phase_col.get(da["phase"], C.DM) + ref = da.get("ref", "")[:55] + print(f" {C.CY} {da['asset']:<38}{C.X} {C.DM}{da['qtype']:<10}{C.X} " + f"{pc}{da['phase']:<10}{C.X} {C.DM}{ref}{C.X}") + if len(discovered_assets) > 30: + print(f" {C.DM} … and {len(discovered_assets)-30} more — use 'export'{C.X}") + + print(f"\n {C.G}{'━'*W}{C.X}") + print(f" {C.DM}Use 'export pdf/html/json' for the full forensic report.{C.X}\n") + + def _pivot(self, arg: str) -> None: + if not self._last: + out("warn", "No results loaded. Run a scan first."); return + if not arg or not arg.strip().isdigit(): + out("warn", "Usage: pivot (see [pivot N] hints in graph output)"); return + idx = int(arg.strip()) - 1 + if not (0 <= idx < len(self._last)): + out("warn", f"Index out of range. Valid: 1–{len(self._last)}"); return + r = self._last[idx] + seed = (_rec_get(r, "email") or _rec_get(r, "username") or + _rec_get(r, "phone") or _rec_get(r, "domain") or "") + if not seed: + out("warn", "Selected record has no pivotable identifier."); return + out("pivot", f"Pivoting → async fullscan on: {C.CY}{seed}{C.X}") + self._fullscan(seed) + + def _visualize(self) -> None: + """ + ASCII Relationship Map: Target → Linked Data → Pivot Points. + Shows the full investigation session chain and cross-links. + """ + full_data = self._last_full or {} + if not self._last and self._last_full: + self._last = self._last_full.get("records", []) + if not full_data.get("target"): + out("warn", "No results loaded. Run a scan, dork, or scrape first."); return + if self._last is None: + self._last = [] + + target = (self._last_full or {}).get("target", "unknown") + chain = self.session_state.get("pivot_chain", []) + scanned = self.session_state.get("targets_scanned", []) + + # Collect linked data + emails, phones, usernames, addresses, passwords = ( + set(), set(), set(), set(), set() + ) + source_map: Dict[str, str] = {} # value → source name + for r in self._last: + for attr, bucket in [("email", emails), ("phone", phones), + ("username", usernames), ("password", passwords)]: + v = _rec_get(r, attr) + if v: + bucket.add(v) + source_map[v] = _rec_get(r, "source") or "" + addr = getattr(r, "address", "") or "" + if addr: + addresses.add(addr) + + W = 70 + print(f"\n {C.G}{'━'*W}{C.X}") + print(f" {C.G} INVESTIGATION RELATIONSHIP MAP{C.X} " + f"[{self.session_state.get('investigator_id','NOX-AUTO')}]") + print(f" {C.G}{'━'*W}{C.X}\n") + + # Session pivot chain + if len(chain) > 1: + print(f" {C.Y}Pivot Chain:{C.X}") + for i, t in enumerate(chain): + arrow = " " if i == 0 else " ↳ " + print(f" {C.DM}{arrow}{C.X}{C.CY}{t}{C.X}") + print() + + # Central target node + print(f" {C.G}◉{C.X} {C.BD}{target}{C.X}") + + # Linked data branches + groups = [ + ("Emails", sorted(emails)[:8], C.CY), + ("Phones", sorted(phones)[:6], C.CY), + ("Usernames", sorted(usernames)[:6], C.G), + ("Addresses", sorted(addresses)[:4], C.Y), + ("Passwords", sorted(passwords)[:5], C.R), + ] + active_groups = [(lbl, vals, col) for lbl, vals, col in groups if vals] + + for gi, (label, values, color) in enumerate(active_groups): + is_last_group = (gi == len(active_groups) - 1) + grp_pfx = " └─" if is_last_group else " ├─" + cont_pfx = " " if is_last_group else " │ " + print(f" {C.DM}{grp_pfx}{C.X} {C.P}[{label}]{C.X}") + for vi, v in enumerate(values): + is_last_val = (vi == len(values) - 1) + val_pfx = f"{cont_pfx} └─" if is_last_val else f"{cont_pfx} ├─" + src_tag = f" {C.DM}← {source_map.get(v,'')[:20]}{C.X}" if source_map.get(v) else "" + # Mark as pivot point if it appears in scanned targets + pivot_tag = f" {C.Y}[PIVOT]{C.X}" if v in scanned else "" + print(f" {C.DM}{val_pfx}{C.X} {color}{v}{C.X}{src_tag}{pivot_tag}") + + # ── Dork results branch ─────────────────────────────────────── + full_data = self._last_full or {} + dork_results = full_data.get("dork_results", []) or [] + if dork_results: + print(f"\n {C.Y}◈ Dork Findings ({len(dork_results)}){C.X}") + for d in dork_results[:8]: + title = d.get("title","") or d.get("dork","") + url = d.get("url","") + print(f" {C.DM} ├─{C.X} {C.O}{title[:60]}{C.X}") + if url: + print(f" {C.DM} │ {url[:70]}{C.X}") + if len(dork_results) > 8: + print(f" {C.DM} └─ … and {len(dork_results)-8} more{C.X}") + + # ── Scrape results branch ───────────────────────────────────── + scrape_results = full_data.get("scrape_results", {}) or {} + pastes = scrape_results.get("pastes", []) + creds_sc = scrape_results.get("credentials", []) + tg_hits = scrape_results.get("telegram", []) + mc_hits = scrape_results.get("dork_misconfigs", []) + if pastes or creds_sc or tg_hits or mc_hits: + total_scrape = len(pastes) + len(creds_sc) + len(tg_hits) + len(mc_hits) + print(f"\n {C.P}◈ Scrape Findings ({total_scrape}){C.X}") + if pastes: + print(f" {C.DM} ├─{C.X} {C.P}[Pastes: {len(pastes)}]{C.X}") + for p in pastes[:3]: + print(f" {C.DM} │ ├─{C.X} [{p.get('site','')}] {p.get('id','')[:40]}") + if len(pastes) > 3: + print(f" {C.DM} │ └─ … and {len(pastes)-3} more{C.X}") + if creds_sc: + print(f" {C.DM} ├─{C.X} {C.R}[Credentials: {len(creds_sc)}]{C.X}") + for c in creds_sc[:3]: + print(f" {C.DM} │ ├─{C.X} {C.R}{c.get('raw','')[:60]}{C.X}") + if len(creds_sc) > 3: + print(f" {C.DM} │ └─ … and {len(creds_sc)-3} more{C.X}") + if tg_hits: + print(f" {C.DM} ├─{C.X} {C.CY}[Telegram: {len(tg_hits)}]{C.X}") + for t in tg_hits[:3]: + print(f" {C.DM} │ ├─{C.X} {C.CY}[{t.get('channel','')}]{C.X} {t.get('text','')[:50]}") + if len(tg_hits) > 3: + print(f" {C.DM} │ └─ … and {len(tg_hits)-3} more{C.X}") + if mc_hits: + print(f" {C.DM} └─{C.X} {C.O}[Misconfigs: {len(mc_hits)}]{C.X}") + for m in mc_hits[:3]: + print(f" {C.DM} ├─{C.X} {C.O}{m.get('title','')[:60]}{C.X}") + if len(mc_hits) > 3: + print(f" {C.DM} └─ … and {len(mc_hits)-3} more{C.X}") + + # ── Discovered / reinjected assets branch ──────────────────── + discovered_assets = full_data.get("discovered_assets", []) or [] + if discovered_assets: + _phase_col = {"breach": C.R, "dork": C.O, "scrape": C.P, "hash_crack": C.P} + print(f"\n {C.B}◈ Reinjected Assets ({len(discovered_assets)}){C.X}") + for da in discovered_assets[:12]: + pc = _phase_col.get(da["phase"], C.DM) + ref = da.get("ref", "")[:50] + print(f" {C.DM} ├─{C.X} {pc}[{da['phase']}]{C.X} " + f"{C.CY}{da['asset']}{C.X} {C.DM}({da['qtype']}) ← {ref}{C.X}") + if len(discovered_assets) > 12: + print(f" {C.DM} └─ … and {len(discovered_assets)-12} more — use 'export'{C.X}") + + print(f"\n {C.G}{'━'*W}{C.X}") + print(f" {C.DM}Targets scanned: {len(scanned)} | " + f"Records: {len(self._last or [])} | " + f"Tip: 'export --format pdf' for forensic report{C.X}\n") + + def _search(self, query: str) -> None: + if not query: + out("warn", "Usage: search "); return + if not self._last: + out("warn", "No results in memory. Run a scan first."); return + q = query.lower() + hits = [r for r in self._last + if q in str(_rec_get(r, "email") or "").lower() + or q in str(_rec_get(r, "username") or "").lower() + or q in str(_rec_get(r, "password") or "").lower() + or q in str(_rec_get(r, "domain") or "").lower() + or q in str(_rec_get(r, "source") or "").lower()] + if not hits: + out("warn", f"No records match '{query}'."); return + out("ok", f" {len(hits)} match(es) for '{query}':\n") + for i, r in enumerate(hits[:30], 1): + em = _rec_get(r, "email") or _rec_get(r, "username") or "—" + pw = _rec_get(r, "password") + ph = _rec_get(r, "phone") + src = _rec_get(r, "source") or "" + line = f" {C.DM}{i:3}.{C.W} {C.CY}{em}{C.W}" + if pw: line += f" {C.R}pw:{pw}{C.W}" + if ph: line += f" {C.CY}☎ {ph}{C.W}" + if src: line += f" {C.DM}[{src}]{C.W}" + print(line) + print() + + @staticmethod + def _record_assets(r: Any) -> str: + """Return a compact string of every non-empty asset field in a record.""" + parts = [] + for label, key in [("ip", "ip_address"), ("phone", "phone"), + ("domain", "domain"), ("name", "full_name"), + ("addr", "address")]: + v = _rec_get(r, key) + if v: parts.append(f"{C.DM}{label}:{C.X}{v}") + ph = _rec_get(r, "password_hash") + ht = _rec_get(r, "hash_type") + if ph and not _rec_get(r, "password"): + parts.append(f"{C.DM}hash[{ht or '?'}]:{C.X}{ph[:20]}…") + dt = _rec_get(r, "data_types") or [] + if isinstance(dt, list) and dt: + parts.append(f"{C.DM}[{', '.join(dt[:3])}]{C.X}") + return " ".join(parts) + + def _print_summary(self, a: dict) -> None: + if not a: return + badge = self._risk_badge(a) + print(f"\n {C.G}{'═'*55}{C.W}") + print(f" {C.G}CTI RESULTS SUMMARY{C.W} {badge}") + print(f" {C.G}{'═'*55}{C.W}") + print(f" Total Records: {a.get('total_records',0)}") + print(f" Unique (deduped): {a.get('unique_records',a.get('total_records',0))}") + print(f" Unique Emails: {a.get('unique_emails',0)}") + print(f" Passwords Found: {C.R}{a.get('passwords_found',0)}{C.W}") + print(f" Stealer Logs: {C.R}{a.get('stealer_logs',0)}{C.W}") + print(f" High-Value Targets: {C.O}{a.get('hvt_count',0)}{C.W}") + print(f" Password Reuse: {len(a.get('reused_passwords',{}))}") + print(f" Avg Persistence Score: {a.get('avg_persistence',0.0)}") + # Show dork/scrape counts if available (autoscan) + full = self._last_full or {} + dork_count = len(full.get("dork_results", []) or []) + scrape = full.get("scrape_results", {}) or {} + paste_count = len(scrape.get("pastes", [])) + cred_count = len(scrape.get("credentials", [])) + tg_count = len(scrape.get("telegram", [])) + mc_count = len(scrape.get("dork_misconfigs", [])) + if dork_count: + print(f" Dork Hits: {C.O}{dork_count}{C.W}") + if paste_count or cred_count or tg_count or mc_count: + print(f" Scraped Pastes: {C.P}{paste_count}{C.W}") + if cred_count: print(f" Scraped Credentials: {C.R}{cred_count}{C.W}") + if tg_count: print(f" Telegram Hits: {C.CY}{tg_count}{C.W}") + if mc_count: print(f" Misconfigurations: {C.O}{mc_count}{C.W}") + rs = a.get("risk_score",0) + col = C.R if rs > 60 else C.Y if rs > 30 else C.G + print(f" Risk Score: {col}{rs}/100{C.W}") + sev = a.get("severity",{}) + print(f"\n Severity: {C.R}■ {sev.get('critical',0)} CRITICAL{C.W} {C.Y}■ {sev.get('high',0)} HIGH{C.W} ■ {sev.get('medium',0)} MEDIUM") + profiles = a.get("profiles",[]) + if profiles: + max_stuffing = max((p.get("stuffing_risk","LOW") for p in profiles), key=lambda x: {"LOW":0,"MEDIUM":1,"HIGH":2,"CRITICAL":3}.get(x,0), default="LOW") + col = C.R if max_stuffing=="CRITICAL" else C.Y if max_stuffing in ("HIGH","MEDIUM") else C.G + print(f" Credential Stuffing: {col}{max_stuffing}{C.W}") + reused = a.get("reused_passwords",{}) + if reused: + print(f"\n {C.R}Password Reuse Detected:{C.W}") + for pw, cnt in list(reused.items())[:5]: + masked = pw[:2]+"*"*(len(pw)-2) if len(pw)>4 else "****" + print(f" {masked} → used {cnt}x across breaches") + + +# ======================================================================= +# 1. API & SECRETS MANAGEMENT +# ======================================================================= +import configparser as _configparser + + +class ConfigManager: + """ + Unified API key manager — delegates to sources/helpers/config_handler.py + (XDG JSON store at ~/.config/nox-cli/apikeys.json) when available, + with a legacy config.ini fallback. + + Resolution order: env-var → apikeys.json → config.ini → '' + """ + + _cache: Dict[str, str] = {} + _INI_PATHS = [HOME_NOX / "config.ini", Path("/etc/nox/config.ini")] + # B4: track apikeys.json mtime to detect external edits + _store_mtime: float = 0.0 + + @classmethod + def _invalidate_if_changed(cls) -> None: + """B4: clear cache if apikeys.json was modified externally.""" + if not _HAS_CONFIG_HANDLER or _ExtConfigManager is None: + return + try: + from sources.helpers.config_handler import _APIKEYS_FILE # type: ignore + if _APIKEYS_FILE and _APIKEYS_FILE.exists(): + mtime = _APIKEYS_FILE.stat().st_mtime + if mtime != cls._store_mtime: + cls._cache.clear() + cls._store_mtime = mtime + if _ExtConfigManager._store is not None: + _ExtConfigManager._store = None + _ExtConfigManager._cache.clear() + except Exception: + pass + + @classmethod + def get(cls, key_name: str) -> str: + cls._invalidate_if_changed() + if key_name in cls._cache: + return cls._cache[key_name] + # 1. Delegate to external handler (XDG JSON store) + if _HAS_CONFIG_HANDLER and _ExtConfigManager is not None: + val = _ExtConfigManager.get(key_name) + if val: + cls._cache[key_name] = val + return val + # 2. Environment variable + val = os.environ.get(key_name) or os.environ.get(f"NOX_{key_name}", "") + # 3. Legacy config.ini + if not val: + for p in cls._INI_PATHS: + if p.exists(): + cfg = _configparser.ConfigParser() + cfg.read(str(p)) + val = cfg.get("api_keys", key_name, fallback="") + if val: + break + if val == UNIVERSAL_PLACEHOLDER: + val = "" + cls._cache[key_name] = val + return val + + @classmethod + def write(cls, key_name: str, value: str) -> None: + """Persist a key — prefers the XDG JSON store, falls back to config.ini.""" + if _HAS_CONFIG_HANDLER and _ExtConfigManager is not None: + _ExtConfigManager.set(key_name, value) + cls._cache[key_name] = value + return + # Legacy: write to config.ini + _write_path = HOME_NOX / "config.ini" + _write_path.parent.mkdir(parents=True, exist_ok=True) + cfg = _configparser.ConfigParser() + if _write_path.exists(): + cfg.read(str(_write_path)) + if "api_keys" not in cfg: + cfg["api_keys"] = {} + cfg["api_keys"][key_name] = value + with open(_write_path, "w") as fh: + cfg.write(fh) + cls._cache[key_name] = value + + +# ======================================================================= +# 2. EXTREME MODULARITY — JSON Source Engine +# ======================================================================= + +class JSONSourceLoader(AsyncSource): + """ + Dynamically loads a custom breach source defined by a JSON file in + ~/.nox/sources/. Each file must contain: + + { + "name": "MySource", + "url": "https://api.example.com/search?q={query}", + "method": "GET", // or "POST" + "headers": {"X-Key": "{api_key}"}, + "payload": {}, // POST body template (optional) + "api_key_env": "MY_API_KEY", // env-var / config.ini key (optional) + "extract": { + "mode": "json", // "json" or "regex" + "root": "results", // JSON path to list (dot-separated) + "email": "email", + "password": "password", + "username": "username", + "phone": "phone", + "hash": "hash" + } + } + + For regex mode, each field value is a regex pattern with one capture group. + """ + + _SOURCES_DIR = SOURCE_DIR + + def __init__(self, semaphore: asyncio.Semaphore, db: "DB", config: "NoxConfig", + definition: dict) -> None: + super().__init__(semaphore, db, config) + self._def = definition + self.name = definition.get("name", "JSONSource") + env_key = definition.get("api_key_env", "") + self._api_key = ConfigManager.get(env_key) if env_key else "" + self.needs_key = bool(env_key) + self.ok_email = self.ok_user = self.ok_domain = self.ok_phone = True + + async def async_search(self, session, query: str, qtype: str) -> List[Record]: + if self.needs_key and not self._api_key: + logger.debug("JSONSourceLoader[%s]: API key missing, skipping.", self.name) + return [] + try: + return await self._fetch(session, query) + except Exception as exc: + logger.debug("JSONSourceLoader[%s]: %s", self.name, exc) + return [] + + async def _fetch(self, session, query: str) -> List[Record]: + d = self._def + url = d["url"].replace("{query}", urllib.parse.quote(query, safe="")).replace("{api_key}", self._api_key) + headers = {k: v.replace("{api_key}", self._api_key) for k, v in d.get("headers", {}).items()} + method = d.get("method", "GET").upper() + payload = {k: v.replace("{query}", query).replace("{api_key}", self._api_key) + for k, v in d.get("payload", {}).items()} + + if method == "POST": + status, text, _ = await self._post(session, url, json_data=payload or None, + data=payload if not payload else None, + headers=headers) + else: + status, text, _ = await self._get(session, url, headers=headers) + + if status not in range(200, 300) or not text: + return [] + + ext = d.get("extract", {}) + mode = ext.get("mode", "json") + if mode == "regex": + return self._extract_regex(text, ext, query) + return self._extract_json(text, ext, query) + + def _extract_json(self, text: str, ext: dict, query: str) -> List[Record]: + try: + data = json.loads(text) + except Exception: + return [] + # Navigate to root list + root_path = ext.get("root", "") + for key in (root_path.split(".") if root_path else []): + if isinstance(data, dict): + data = data.get(key, []) + if not isinstance(data, list): + data = [data] if isinstance(data, dict) else [] + records = [] + for item in data[:100]: + if not isinstance(item, dict): + continue + records.append(self._rec( + email = str(item.get(ext.get("email", "email"), "") or ""), + password = str(item.get(ext.get("password", "password"), "") or ""), + username = str(item.get(ext.get("username", "username"), "") or ""), + phone = str(item.get(ext.get("phone", "phone"), "") or ""), + password_hash = str(item.get(ext.get("hash", "hash"), "") or ""), + breach_name = self.name, + data_types = [self.name, "Credentials"], + raw_data = item, + )) + return records + + def _extract_regex(self, text: str, ext: dict, query: str) -> List[Record]: + field_patterns = {f: ext[f] for f in ("email","password","username","phone","hash") if f in ext} + # Find all matches per field + field_values: Dict[str, List[str]] = {} + for fname, pattern in field_patterns.items(): + field_values[fname] = re.findall(pattern, text) + # Zip into records (align by index) + max_len = max((len(v) for v in field_values.values()), default=0) + records = [] + for i in range(min(max_len, 100)): + records.append(self._rec( + email = field_values.get("email", [""])[i] if i < len(field_values.get("email", [])) else "", + password = field_values.get("password", [""])[i] if i < len(field_values.get("password", [])) else "", + username = field_values.get("username", [""])[i] if i < len(field_values.get("username", [])) else "", + phone = field_values.get("phone", [""])[i] if i < len(field_values.get("phone", [])) else "", + password_hash = field_values.get("hash", [""])[i] if i < len(field_values.get("hash", [])) else "", + breach_name = self.name, + data_types = [self.name, "Credentials"], + )) + return records + + @classmethod + def load_all(cls, semaphore: asyncio.Semaphore, db: "DB", config: "NoxConfig") -> List["JSONSourceLoader"]: + """Scan ~/.nox/sources/ and return one loader per valid .json file.""" + cls._SOURCES_DIR.mkdir(parents=True, exist_ok=True) + loaders = [] + for jf in cls._SOURCES_DIR.glob("*.json"): + try: + definition = json.loads(jf.read_text(encoding="utf-8")) + loaders.append(cls(semaphore, db, config, definition)) + logger.info("JSONSourceLoader: loaded %s", jf.name) + except Exception as exc: + logger.warning("JSONSourceLoader: failed to load %s — %s", jf.name, exc) + return loaders + + +# ======================================================================= +# 3. DeHashEngine & ReputationEngine +# ======================================================================= + +class DeHashEngine: + """ + Queries MD5/SHA1 hashes found during scans against de-hashing APIs. + Requires DEHASHED_API_KEY (email:api_key format) or DEHASH_API_KEY. + Gracefully skips if key is absent. + """ + + def __init__(self, db: "DB", config: "NoxConfig") -> None: + self._db = db + self._config = config + self._key = (ConfigManager.get("DEHASHED_API_KEY") + or ConfigManager.get("DEHASH_API_KEY") + or db.get_key("dehashed")) + + async def dehash_records(self, session, records: List[Record]) -> List[Record]: + """Attempt to crack any unhashed passwords found in records.""" + if not self._key: + return records + hashes = {r.password_hash for r in records if r.password_hash and not r.password} + if not hashes: + return records + sem = asyncio.Semaphore(5) + tasks = [self._lookup(session, sem, h) for h in list(hashes)[:20]] + results = await asyncio.gather(*tasks, return_exceptions=True) + crack_map: Dict[str, str] = {} + for res in results: + if isinstance(res, tuple): + crack_map[res[0]] = res[1] + for r in records: + if r.password_hash in crack_map: + r.password = crack_map[r.password_hash] + r.data_types = list(set(r.data_types + ["DeHashed"])) + return records + + async def _lookup(self, session, sem: asyncio.Semaphore, h: str): + cached = self._db.get_plain(h) + if cached: + return (h, cached) + try: + auth = base64.b64encode(self._key.encode()).decode() if ":" in self._key else self._key + url = f"https://api.dehashed.com/search?query=hashed_password:{h}&size=1" + hdrs = {"Accept": "application/json", "Authorization": f"Basic {auth}"} + async with sem: + to = aiohttp_mod.ClientTimeout(total=self._config.timeout) if aiohttp_mod else None + async with session.get(url, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: + if resp.status == 200: + data = await resp.json() + for entry in data.get("entries", []): + pw = entry.get("password", "") + if pw: + self._db.store_hash(h, "unknown", pw, "DeHashed") + return (h, pw) + except Exception as exc: + logger.debug("DeHashEngine._lookup %s: %s", h[:16], exc) + return (h, "") + + +class ReputationEngine: + """ + Checks IP/Domain targets via VirusTotal. + Requires VIRUSTOTAL_API_KEY. Gracefully skips if absent. + """ + + _VT_URL = "https://www.virustotal.com/api/v3" + + def __init__(self, config: "NoxConfig") -> None: + self._config = config + self._key = (ConfigManager.get("VIRUSTOTAL_API_KEY") + or ConfigManager.get("VT_API_KEY")) + + async def check(self, session, target: str, qtype: str) -> Optional[dict]: + """Return VirusTotal summary dict or None if key missing / not applicable.""" + if not self._key or qtype not in ("ip", "domain", "url"): + return None + try: + if qtype == "ip": + url = f"{self._VT_URL}/ip_addresses/{target}" + elif qtype == "domain": + url = f"{self._VT_URL}/domains/{target}" + else: + encoded = base64.urlsafe_b64encode(target.encode()).decode().rstrip("=") + url = f"{self._VT_URL}/urls/{encoded}" + hdrs = {"x-apikey": self._key} + to = aiohttp_mod.ClientTimeout(total=self._config.timeout) if aiohttp_mod else None + async with session.get(url, headers=hdrs, timeout=to, ssl=_SSL_CTX) as resp: + if resp.status == 200: + data = await resp.json() + stats = (data.get("data", {}) + .get("attributes", {}) + .get("last_analysis_stats", {})) + return { + "target": target, + "malicious": stats.get("malicious", 0), + "suspicious": stats.get("suspicious", 0), + "harmless": stats.get("harmless", 0), + "source": "VirusTotal", + } + except Exception as exc: + logger.debug("ReputationEngine.check %s: %s", target, exc) + return None + + +# ======================================================================= +# 4. PROFESSIONAL PDF REPORTING (fpdf2) +# ======================================================================= + +def _pdf_report(data: dict, path: str) -> None: + """ + Generate a professional PDF report using fpdf2. + Layout: Title Page → Executive Summary → Entities Table → Raw Evidence. + Falls back gracefully if fpdf2 is not installed. + """ + try: + from fpdf import FPDF # type: ignore + except ImportError: + out("warn", "fpdf2 not installed. Run: pip install fpdf2") + return + + records = data.get("records", []) + target = data.get("target", "Unknown") + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + summary = AdvancedReporter._build_summary(records) + + class _PDF(FPDF): + def header(self): + self.set_font("Helvetica", "B", 9) + self.set_text_color(100, 100, 100) + self.cell(0, 6, f"NOX Framework v{VERSION} | CONFIDENTIAL", align="R") + self.ln(4) + + def footer(self): + self.set_y(-12) + self.set_font("Helvetica", "", 8) + self.set_text_color(150, 150, 150) + self.cell(0, 6, f"Page {self.page_no()}", align="C") + + pdf = _PDF(orientation="P", unit="mm", format="A4") + pdf.set_auto_page_break(auto=True, margin=15) + pdf.set_margins(15, 15, 15) + + # ── Title Page ──────────────────────────────────────────────────── + pdf.add_page() + pdf.set_fill_color(10, 10, 10) + pdf.rect(0, 0, 210, 297, "F") + + pdf.set_y(80) + pdf.set_font("Helvetica", "B", 32) + pdf.set_text_color(0, 255, 65) + pdf.cell(0, 14, "NOX FRAMEWORK REPORT", align="C") + pdf.ln(10) + + pdf.set_font("Helvetica", "", 14) + pdf.set_text_color(200, 200, 200) + pdf.cell(0, 8, f"Target: {target}", align="C") + pdf.ln(7) + pdf.set_font("Helvetica", "", 11) + pdf.set_text_color(150, 150, 150) + pdf.cell(0, 7, f"Generated: {ts}", align="C") + pdf.ln(5) + pdf.cell(0, 7, "FOR AUTHORISED USE ONLY", align="C") + + # ── Executive Summary ───────────────────────────────────────────── + pdf.add_page() + pdf.set_fill_color(255, 255, 255) + pdf.set_text_color(0, 0, 0) + + pdf.set_font("Helvetica", "B", 16) + pdf.cell(0, 10, "Executive Summary", ln=True) + pdf.set_draw_color(0, 200, 50) + pdf.set_line_width(0.5) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(4) + + max_risk = max((float(_rec_get(r, "risk_score") or 0) for r in records), default=0.0) + kpis = [ + ("Compromised Identities", summary["total_identities"]), + ("Total Records", summary["total_records"]), + ("Stealer Logs", summary["stealer_count"]), + ("High-Value Targets", summary["hvt_count"]), + ("Max Risk Score", f"{max_risk:.1f} / 100"), + ] + pdf.set_font("Helvetica", "B", 10) + for label, value in kpis: + pdf.set_fill_color(245, 245, 245) + pdf.cell(90, 8, label, border=1, fill=True) + pdf.set_font("Helvetica", "", 10) + pdf.cell(85, 8, str(value), border=1, ln=True) + pdf.set_font("Helvetica", "B", 10) + pdf.ln(6) + + # Risk distribution + pdf.set_font("Helvetica", "B", 12) + pdf.cell(0, 8, "Risk Distribution", ln=True) + pdf.set_font("Helvetica", "B", 9) + for col, w in [("Level", 40), ("Count", 30), ("Bar", 105)]: + pdf.set_fill_color(30, 30, 30) + pdf.set_text_color(255, 255, 255) + pdf.cell(w, 7, col, border=1, fill=True) + pdf.ln() + pdf.set_text_color(0, 0, 0) + total_b = max(sum(summary["buckets"].values()), 1) + colours = {"Critical": (220,0,30), "High": (220,100,0), "Medium": (200,180,0), + "Low": (0,150,50), "Info": (100,100,100)} + for level, count in summary["buckets"].items(): + pdf.set_font("Helvetica", "", 9) + pdf.cell(40, 6, level, border=1) + pdf.cell(30, 6, str(count), border=1) + bar_w = int(count / total_b * 100) + x, y = pdf.get_x(), pdf.get_y() + pdf.cell(105, 6, "", border=1) + if bar_w: + r2, g2, b2 = colours.get(level, (100,100,100)) + pdf.set_fill_color(r2, g2, b2) + pdf.rect(x + 1, y + 1, bar_w, 4, "F") + pdf.ln() + pdf.ln(4) + + # HVT list + if summary["hvt_list"]: + pdf.set_font("Helvetica", "B", 12) + pdf.cell(0, 8, f"High-Value Targets ({summary['hvt_count']})", ln=True) + pdf.set_font("Helvetica", "", 9) + for hvt in summary["hvt_list"][:20]: + pdf.cell(0, 5, f" \u26a0 {hvt}", ln=True) + pdf.ln(3) + + # ── Discovered Entities Table ───────────────────────────────────── + pdf.add_page() + pdf.set_font("Helvetica", "B", 16) + pdf.cell(0, 10, "Discovered Entities", ln=True) + pdf.set_draw_color(0, 200, 50) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(4) + + col_widths = [55, 40, 35, 25, 25] + headers = ["Identity", "Source", "Breach", "Date", "Risk"] + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30) + pdf.set_text_color(255, 255, 255) + for h, w in zip(headers, col_widths): + pdf.cell(w, 7, h, border=1, fill=True) + pdf.ln() + pdf.set_text_color(0, 0, 0) + + for rec in records[:200]: + ident = (_rec_get(rec, "email") or _rec_get(rec, "username") or "—")[:30] + src = (_rec_get(rec, "source") or "")[:20] + bn = (_rec_get(rec, "breach_name") or "")[:20] + bd = (_rec_get(rec, "breach_date") or "")[:10] + rs_v = f"{float(_rec_get(rec, 'risk_score') or 0):.1f}" + risk = float(_rec_get(rec, "risk_score") or 0) + if risk >= 90: pdf.set_fill_color(255, 220, 220) + elif risk >= 70: pdf.set_fill_color(255, 240, 220) + else: pdf.set_fill_color(255, 255, 255) + pdf.set_font("Helvetica", "", 7) + for val, w in zip([ident, src, bn, bd, rs_v], col_widths): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + + # ── Raw Evidence ────────────────────────────────────────────────── + pdf.add_page() + pdf.set_font("Helvetica", "B", 16) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 10, "Raw Evidence — Passwords & Metadata", ln=True) + pdf.set_draw_color(0, 200, 50) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(4) + + pdf.set_font("Courier", "", 7) + for rec in records[:300]: + pw = _rec_get(rec, "password") + meta = getattr(rec, "metadata", {}) or {} + if not pw and not meta: + continue + ident = (_rec_get(rec, "email") or _rec_get(rec, "username") or "—")[:40] + line = f"{ident}" + if pw: + line += f" pw:{pw[:40]}" + if meta.get("author"): + line += f" author:{meta['author'][:20]}" + pdf.cell(0, 4, line[:120], ln=True) + + pdf.output(path) + out("ok", f"PDF report saved: {path}") + + +# ======================================================================= +# PLUGIN SYSTEM — Vault, FileSystemProvider, SourceOrchestrator +# ======================================================================= +import importlib as _importlib + + +class Vault: + """ + Thin compatibility shim — delegates entirely to ConfigManager (apikeys.json). + Canonical key store: ~/.config/nox-cli/apikeys.json (chmod 0600). + """ + + _cache: Dict[str, str] = {} + + @classmethod + def get(cls, key: str) -> str: + if key in cls._cache: + return cls._cache[key] + val = ConfigManager.get(key) or "" + cls._cache[key] = val + return val + + @classmethod + def set(cls, key: str, value: str, prefer_nox_dir: bool = True) -> None: + ConfigManager.write(key, value) + cls._cache[key] = value + + @classmethod + def autodehash(cls, records: List["Record"], db: "DB") -> List["Record"]: + """ + AutoDehash hook: for any record with a hash but no plaintext, + attempt a lookup via DEHASH_API_KEY if available. + Uses the existing DB hash cache to avoid redundant API calls. + """ + key = cls.get("DEHASH_API_KEY") or cls.get("DEHASHED_API_KEY") + if not key: + return records + for r in records: + if r.password_hash and not r.password: + cached = db.get_plain(r.password_hash) + if cached: + r.password = cached + continue + # Synchronous fallback lookup via requests/urllib + try: + auth = base64.b64encode(key.encode()).decode() if ":" in key else key + url = (f"https://api.dehashed.com/search" + f"?query=hashed_password:{r.password_hash}&size=1") + hdrs = {"Accept": "application/json", + "Authorization": f"Basic {auth}", + "User-Agent": "NOX Framework"} + if requests: + resp = requests.get(url, headers=hdrs, timeout=10, verify=True) + data = resp.json() if resp.status_code == 200 else {} + else: + req = urllib.request.Request(url, headers=hdrs) + raw = urllib.request.urlopen(req, timeout=10) + data = json.loads(raw.read().decode()) + for entry in data.get("entries", []): + pw = entry.get("password", "") + if pw: + r.password = pw + db.store_hash(r.password_hash, r.hash_type or "unknown", pw, "Vault/AutoDehash") + break + except Exception as exc: + logger.debug("Vault.autodehash %s: %s", r.password_hash[:12], exc) + return records + + +class Config: + """ + General settings loader from config.ini. + Lookup order: $HOME/.nox/config.ini → /etc/nox/config.ini. + + config.ini format: + [settings] + concurrency = 20 + timeout = 30 + stealth = true + rate_limit_lo = 0.5 + rate_limit_hi = 2.0 + """ + + _INI_PATHS = [HOME_NOX / "config.ini", Path("/etc/nox/config.ini")] + _cache: Dict[str, Any] = {} + + @classmethod + def _ini_path(cls) -> Optional[Path]: + for p in cls._INI_PATHS: + if p.exists(): + return p + return None + + @classmethod + def get(cls, key: str, default: Any = None) -> Any: + if key in cls._cache: + return cls._cache[key] + ini = cls._ini_path() + if ini: + cp = _configparser.ConfigParser() + cp.read(str(ini)) + val = cp.get("settings", key, fallback=None) + if val is not None: + # Auto-cast booleans and numbers + if val.lower() in ("true", "false"): + val = val.lower() == "true" + else: + try: + val = int(val) + except ValueError: + try: + val = float(val) + except ValueError: + pass + cls._cache[key] = val + return val + cls._cache[key] = default + return default + + @classmethod + def apply(cls, nox_config: "NoxConfig") -> "NoxConfig": + """Overlay config.ini values onto a NoxConfig instance.""" + if not cls._ini_path(): + return nox_config + nox_config.concurrency = nox_config.max_threads = cls.get("concurrency", nox_config.concurrency) + nox_config.timeout = cls.get("timeout", nox_config.timeout) + nox_config.stealth = cls.get("stealth", nox_config.stealth) + lo = cls.get("rate_limit_lo", nox_config.rate_limit[0]) + hi = cls.get("rate_limit_hi", nox_config.rate_limit[1]) + nox_config.rate_limit = (lo, hi) + return nox_config + + +class FileSystemProvider(AsyncSource): + """ + Loads a single breach source from a JSON definition file in + ~/.config/nox/providers/. + + JSON schema: + { + "name": "MySource", + "api_url": "https://api.example.com/search?q={query}", + "request_type": "GET", + "headers": {"Authorization": "Bearer {api_key}"}, + "payload": {}, + "regex_pattern": "(\\S+@\\S+):(\\S+)", // optional; groups: email, password + "json_root": "results", // dot-path to list in JSON response + "field_map": {"email":"email","password":"password"}, + "required_api_key_name": "MY_SOURCE_API_KEY" // Vault key name + } + """ + + PROVIDERS_DIR = HOME_NOX / "providers" + + def __init__(self, semaphore: asyncio.Semaphore, db: "DB", + config: "NoxConfig", definition: dict) -> None: + super().__init__(semaphore, db, config) + self._def = definition + self.name = definition.get("name", "FSProvider") + key_name = definition.get("required_api_key_name", "") + self._api_key = Vault.get(key_name) if key_name else "" + self.needs_key = bool(key_name) + self.ok_email = self.ok_user = self.ok_domain = self.ok_phone = True + + async def async_search(self, session, query: str, qtype: str) -> List[Record]: + if self.needs_key and not self._api_key: + logger.debug("FileSystemProvider[%s]: key missing, skipping.", self.name) + return [] + try: + return await self._fetch(session, query) + except Exception as exc: + logger.debug("FileSystemProvider[%s]: %s", self.name, exc) + return [] + + async def _fetch(self, session, query: str) -> List[Record]: + d = self._def + url = (d["api_url"] + .replace("{query}", urllib.parse.quote(query, safe="")) + .replace("{api_key}", self._api_key)) + hdrs = {k: v.replace("{api_key}", self._api_key) + for k, v in d.get("headers", {}).items()} + method = d.get("request_type", "GET").upper() + payload = {k: v.replace("{query}", query).replace("{api_key}", self._api_key) + for k, v in d.get("payload", {}).items()} + + if method == "POST": + status, text, _ = await self._post(session, url, + json_data=payload or None, + headers=hdrs) + else: + status, text, _ = await self._get(session, url, headers=hdrs) + + if status not in range(200, 300) or not text: + return [] + + regex = d.get("regex_pattern", "") + if regex: + return self._by_regex(text, regex) + return self._by_json(text, d.get("json_root", ""), + d.get("field_map", {})) + + def _by_regex(self, text: str, pattern: str) -> List[Record]: + records = [] + for m in re.finditer(pattern, text): + groups = m.groups() + records.append(self._rec( + email = groups[0] if len(groups) > 0 else "", + password = groups[1] if len(groups) > 1 else "", + breach_name = self.name, + data_types = [self.name, "Credentials"], + )) + return records[:100] + + def _by_json(self, text: str, root: str, field_map: dict) -> List[Record]: + try: + data = json.loads(text) + except Exception: + return [] + for key in (root.split(".") if root else []): + if isinstance(data, dict): + data = data.get(key, []) + if not isinstance(data, list): + data = [data] if isinstance(data, dict) else [] + records = [] + for item in data[:100]: + if not isinstance(item, dict): + continue + records.append(self._rec( + email = str(item.get(field_map.get("email", "email"), "") or ""), + password = str(item.get(field_map.get("password", "password"), "") or ""), + username = str(item.get(field_map.get("username", "username"), "") or ""), + phone = str(item.get(field_map.get("phone", "phone"), "") or ""), + password_hash = str(item.get(field_map.get("hash", "hash"), "") or ""), + breach_name = self.name, + data_types = [self.name, "Credentials"], + raw_data = item, + )) + return records + + @classmethod + def load_all(cls, semaphore: asyncio.Semaphore, db: "DB", + config: "NoxConfig") -> List["FileSystemProvider"]: + cls.PROVIDERS_DIR.mkdir(parents=True, exist_ok=True) + providers = [] + for jf in cls.PROVIDERS_DIR.glob("*.json"): + try: + defn = json.loads(jf.read_text(encoding="utf-8")) + providers.append(cls(semaphore, db, config, defn)) + logger.info("FileSystemProvider: loaded %s", jf.name) + except Exception as exc: + logger.warning("FileSystemProvider: failed %s — %s", jf.name, exc) + return providers + + +class NoxSourceProvider(FileSystemProvider): + """ + Extended FileSystemProvider that handles the build_sources.py JSON schema: + - Headers already have keys resolved (passed via _slot_keys) + - Supports input_type filtering (skip source if query type doesn't match) + - Handles api_key_slots rotation + """ + + def __init__(self, semaphore: asyncio.Semaphore, db: "DB", + config: "NoxConfig", definition: dict) -> None: + super().__init__(semaphore, db, config, definition) + self._input_type = definition.get("input_type", "") + self._slot_keys = definition.get("_slot_keys", {}) + self._confidence = definition.get("confidence", 0.5) + # For sources with api_key_slots, check if any key is configured + slots = definition.get("api_key_slots", []) + if slots and not self._api_key: + # Try each slot + for slot in slots: + key_name = slot.strip("{}") + val = ConfigManager.get(key_name) + if val: + self._api_key = val + break + self.needs_key = bool(slots) + + async def async_search(self, session, query: str, qtype: str) -> List[Record]: + # Filter by input_type if specified ('any' or '' means accept all qtypes) + if self._input_type and self._input_type != "any" and qtype and self._input_type != qtype: + return [] + if self.needs_key and not self._api_key: + logger.debug("NoxSourceProvider[%s]: key missing, skipping.", self.name) + return [] + try: + return await self._fetch(session, query) + except Exception as exc: + logger.debug("NoxSourceProvider[%s]: %s", self.name, exc) + return [] + + async def _fetch(self, session, query: str) -> List[Record]: + d = self._def + # Headers are already resolved in _load_nox_sources; just substitute {query} + hdrs = {k: v.replace("{query}", urllib.parse.quote(query, safe="")) + for k, v in d.get("headers", {}).items()} + url = (d["api_url"] + .replace("{query}", urllib.parse.quote(query, safe="")) + .replace("{api_key}", self._api_key or "")) + # Also substitute any remaining {KEY_NAME} placeholders in URL + for slot_name, slot_val in self._slot_keys.items(): + url = url.replace(f"{{{slot_name}}}", slot_val or "") + + method = d.get("request_type", "GET").upper() + + def _sub(obj): + """Recursively substitute {query} in payload (handles nested dicts/lists).""" + if isinstance(obj, str): + return obj.replace("{query}", query).replace("{target}", query) + if isinstance(obj, dict): + return {k: _sub(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_sub(v) for v in obj] + return obj + + payload = _sub(d.get("payload") or {}) + + if method == "POST": + status, text, _ = await self._post(session, url, + json_data=payload or None, + headers=hdrs) + else: + status, text, _ = await self._get(session, url, headers=hdrs) + + if status not in range(200, 300) or not text: + return [] + + regex = d.get("regex_pattern", "") + if regex: + return self._by_regex(text, regex) + return self._by_json(text, d.get("json_root", ""), d.get("field_map", {})) + + +class SourceOrchestrator: + """ + Plugin-based source manager — 100% dynamic, zero hardcoded sources. + + Loads all intelligence sources exclusively from: + 1. ~/.nox/sources/*.json — primary plugin directory (build_sources.py output) + 2. ~/.nox/providers/*.json — extended FileSystemProvider plugins + 3. ~/.nox/providers/plugin_*.py — dynamic importlib plugins + + FATAL if sources/ is empty: prints a clear error and aborts the scan. + """ + + # Spec-required path: ~/.nox/sources/ + SOURCES_DIR = SOURCE_DIR + + def __init__(self, semaphore: asyncio.Semaphore, db: "DB", + config: "NoxConfig") -> None: + self._sem = semaphore + self._db = db + self._config = config + self._nox_sources: List[AsyncSource] = [] # from ~/.nox/sources/ + self._fs_providers: List[AsyncSource] = [] # from ~/.nox/providers/ + self._py_providers: List[AsyncSource] = [] # importlib .py plugins + self._loaded = False + + def _ensure_loaded(self) -> None: + if self._loaded: + return + self._nox_sources = self._load_nox_sources() + self._fs_providers = FileSystemProvider.load_all(self._sem, self._db, self._config) + self._py_providers = self._load_py_plugins() + self._loaded = True + + total = len(self._nox_sources) + len(self._fs_providers) + len(self._py_providers) + if total == 0: + print( + f"\n {C.BD}{C.R}[FATAL] No JSON plugins found in sources/. " + f"Please run build_sources.py first.{C.X}\n" + ) + logger.critical("[FATAL] No JSON plugins found in sources/. Run build_sources.py.") + + def _load_nox_sources(self) -> List[AsyncSource]: + """ + Scan ~/.nox/sources/*.json. Handles both the build_sources.py schema + (endpoint/{target}, normalization_map, selectors, api_key_slots) and the + legacy FileSystemProvider schema (api_url/{query}, field_map, json_root). + """ + self.SOURCES_DIR.mkdir(parents=True, exist_ok=True) + json_files = list(self.SOURCES_DIR.glob("*.json")) + if not json_files: + return [] + sources: List[AsyncSource] = [] + for jf in json_files: + try: + raw = json.loads(jf.read_text(encoding="utf-8")) + slots = raw.get("api_key_slots", []) + # Derive primary key name from slots (strip {}) + derived_key_name = ( + raw.get("required_api_key_name", "") + or (slots[0].strip("{}") if slots else "") + ) + # Resolve all key names from slots for header substitution + slot_keys = {s.strip("{}"): ConfigManager.get(s.strip("{}")) for s in slots} + + # Build headers: replace {KEY_NAME} placeholders with actual key values + raw_headers = raw.get("headers", {}) + resolved_headers = {} + for k, v in raw_headers.items(): + for slot_name, slot_val in slot_keys.items(): + v = v.replace(f"{{{slot_name}}}", slot_val or "") + resolved_headers[k] = v + + # Normalise endpoint: {target} → {query} for FileSystemProvider compat + endpoint = raw.get("endpoint", raw.get("api_url", "")) + endpoint = endpoint.replace("{target}", "{query}") + + # Build field_map from normalization_map (inverted: output_field → source_field) + norm_map = raw.get("normalization_map", {}) + field_map = raw.get("field_map", {}) + if norm_map and not field_map: + # normalization_map: {"email": "email_address"} means source field "email_address" → our "email" + field_map = {our_field: src_field for our_field, src_field in norm_map.items() + if our_field in ("email", "password", "username", "phone", "hash")} + + # json_root from selectors (e.g. "$.entries" → "entries") + selectors = raw.get("selectors", {}) + json_root = raw.get("json_root", "") + if not json_root and selectors: + # Take first selector value, strip "$." prefix + first_sel = next(iter(selectors.values()), "") + if first_sel.startswith("$."): + # Handle "$.entries" → "entries", "$.*.Name" → "" (complex path, skip) + parts = first_sel[2:].split(".") + json_root = parts[0] if len(parts) == 1 else "" + + defn = { + "name": raw.get("name", jf.stem), + "api_url": endpoint, + "request_type": raw.get("method", raw.get("request_type", "GET")), + "headers": resolved_headers, + "regex_pattern": raw.get("regex_pattern", ""), + "json_root": json_root, + "field_map": field_map, + "required_api_key_name": derived_key_name, + "api_key_slots": slots, + "input_type": raw.get("input_type", ""), + "output_type": raw.get("output_type", []), + "pivot_types": raw.get("pivot_types", []), + "confidence": raw.get("confidence", 0.5), + # payload_template → payload for POST sources + "payload": raw.get("payload_template") or raw.get("payload") or {}, + # Pass resolved slot keys so FileSystemProvider can use them + "_slot_keys": slot_keys, + } + sources.append(NoxSourceProvider(self._sem, self._db, self._config, defn)) + logger.debug("SourceOrchestrator: loaded %s", jf.name) + except Exception as exc: + logger.warning("SourceOrchestrator: failed %s — %s", jf.name, exc) + logger.info("SourceOrchestrator: loaded %d sources from sources/", len(sources)) + return sources + + def _load_py_plugins(self) -> List[AsyncSource]: + """Dynamically import plugin_*.py files via importlib.""" + plugins: List[AsyncSource] = [] + for py_file in FileSystemProvider.PROVIDERS_DIR.glob("plugin_*.py"): + try: + spec = _importlib.util.spec_from_file_location(py_file.stem, py_file) + module = _importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + if hasattr(module, "create"): + inst = module.create(self._sem, self._db, self._config) + if isinstance(inst, list): + plugins.extend(inst) + elif inst is not None: + plugins.append(inst) + logger.info("SourceOrchestrator: loaded plugin %s", py_file.name) + except Exception as exc: + logger.warning("SourceOrchestrator: plugin %s failed — %s", py_file.name, exc) + return plugins + + def get_sources(self, session: "Session", qtype: str) -> List[AsyncSource]: + """Return plugin sources applicable to qtype, pre-filtered to avoid creating unnecessary tasks.""" + self._ensure_loaded() + sources: List[AsyncSource] = [] + for src in self._nox_sources: + input_type = getattr(src, "_input_type", "") + if not input_type or input_type == "any" or not qtype or input_type == qtype: + sources.append(src) + sources.extend(self._fs_providers) + sources.extend(self._py_providers) + return sources + + def plugin_count(self) -> int: + self._ensure_loaded() + return len(self._nox_sources) + len(self._fs_providers) + len(self._py_providers) + + +# ======================================================================= +# FORENSIC REPORTER (fpdf2) +# ======================================================================= + +def _pdf_safe(s: str, maxlen: int = 200) -> str: + """ + Sanitise a string for fpdf2 core fonts (latin-1 subset). + 1. Strip control characters and binary garbage. + 2. Replace non-latin-1 characters with '?' to prevent UnicodeEncodeError. + 3. Truncate to maxlen to prevent cell overflow. + """ + if not s: + return "" + # Strip control chars (same regex as AdvancedReporter._CTRL_RE) + s = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]", "", s) + return s[:maxlen].encode("latin-1", errors="replace").decode("latin-1") + + +class ForensicReporter: + """ + Professional forensic PDF report using fpdf2. + + Sections: + 1. Case Metadata — Timestamp, Investigator ID, Target + 2. Executive Summary — Risk Score (0–10 scale), severity breakdown + 3. Categorized Findings — Credentials, PII, Dorked Documents + 4. Dork Results — URL, snippet, dork query, engine + 5. Scrape Results — Pastes (with links), extracted credentials, Telegram CTI, misconfigs + 6. Identity Graph — ASCII relationship map + """ + + @staticmethod + def generate(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None: + try: + from fpdf import FPDF # type: ignore + except ImportError: + out("warn", "fpdf2 not installed. Run: pip install fpdf2") + return + + records = data.get("records", []) + target = data.get("target", "Unknown") + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") + summary = AdvancedReporter._build_summary(records) + + # Risk score normalised to 0–10 + max_risk = max((float(_rec_get(r, "risk_score") or 0) for r in records), default=0.0) + risk_10 = round(max_risk / 10, 1) + + # Categorise findings + credentials = [r for r in records if _rec_get(r, "password") or _rec_get(r, "password_hash")] + pii = [r for r in records if _rec_get(r, "phone") or _rec_get(r, "name") + or getattr(r, "address", "")] + dorked = [r for r in records if _rec_get(r, "source") == "DorkingEngine"] + + class _PDF(FPDF): + def header(self): + self.set_font("Helvetica", "B", 8) + self.set_text_color(120, 120, 120) + self.cell(0, 5, "NOX Framework - FORENSIC REPORT - CONFIDENTIAL", align="R") + self.ln(3) + + def footer(self): + self.set_y(-12) + self.set_font("Helvetica", "", 8) + self.set_text_color(150, 150, 150) + self.cell(0, 5, _pdf_safe(f"Page {self.page_no()} | Case: {target[:40]}"), align="C") + + pdf = _PDF(orientation="P", unit="mm", format="A4") + pdf.set_auto_page_break(auto=True, margin=15) + pdf.set_margins(15, 15, 15) + + # ── 1. Case Metadata ───────────────────────────────────────── + pdf.add_page() + pdf.set_fill_color(15, 15, 15) + pdf.rect(0, 0, 210, 297, "F") + + pdf.set_y(70) + pdf.set_font("Helvetica", "B", 28) + pdf.set_text_color(0, 220, 60) + pdf.cell(0, 12, "FORENSIC INTELLIGENCE REPORT", align="C") + pdf.ln(8) + pdf.set_font("Helvetica", "B", 14) + pdf.set_text_color(200, 200, 200) + pdf.cell(0, 8, _pdf_safe(f"Target: {target}"), align="C") + pdf.ln(6) + pdf.set_font("Helvetica", "", 11) + pdf.set_text_color(140, 140, 140) + for line in [f"Timestamp: {ts}", + f"Investigator ID: {investigator_id}", + f"Framework: NOX Framework v{VERSION}", + "Classification: RESTRICTED - Authorised Use Only"]: + pdf.cell(0, 6, _pdf_safe(line), align="C") + pdf.ln(5) + + # ── 2. Executive Summary ───────────────────────────────────── + pdf.add_page() + pdf.set_fill_color(255, 255, 255) + pdf.set_text_color(0, 0, 0) + pdf.set_font("Helvetica", "B", 16) + pdf.cell(0, 10, "Executive Summary", ln=True) + pdf.set_draw_color(0, 180, 50) + pdf.set_line_width(0.4) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(4) + + # Risk score gauge (0–10) + risk_colour = (200, 0, 30) if risk_10 >= 8 else (220, 110, 0) if risk_10 >= 5 else (0, 160, 50) + pdf.set_font("Helvetica", "B", 11) + kpis = [ + ("Risk Score (0-10)", f"{risk_10} {'#' * int(risk_10)}{'-' * (10 - int(risk_10))}"), + ("Compromised Identities", str(summary["total_identities"])), + ("Total Records", str(summary["total_records"])), + ("Stealer Logs", str(summary["stealer_count"])), + ("High-Value Targets", str(summary["hvt_count"])), + ("Credential Records", str(len(credentials))), + ("PII Records", str(len(pii))), + ("Dorked Documents", str(len(dorked))), + ] + for label, value in kpis: + pdf.set_fill_color(245, 245, 245) + pdf.cell(90, 7, _pdf_safe(label), border=1, fill=True) + if label.startswith("Risk"): + pdf.set_text_color(*risk_colour) + pdf.set_font("Helvetica", "", 10) + pdf.cell(85, 7, _pdf_safe(value), border=1, ln=True) + pdf.set_text_color(0, 0, 0) + pdf.set_font("Helvetica", "B", 11) + pdf.ln(5) + + # Severity breakdown + pdf.set_font("Helvetica", "B", 12) + pdf.cell(0, 8, "Severity Breakdown", ln=True) + _sev_colours = {"Critical":(220,0,30),"High":(220,100,0), + "Medium":(200,180,0),"Low":(0,150,50),"Info":(100,100,100)} + total_b = max(sum(summary["buckets"].values()), 1) + for level, count in summary["buckets"].items(): + pdf.set_font("Helvetica", "", 9) + pdf.cell(35, 6, _pdf_safe(level), border=1) + pdf.cell(20, 6, str(count), border=1) + bar_w = int(count / total_b * 120) + x, y = pdf.get_x(), pdf.get_y() + pdf.cell(125, 6, "", border=1) + if bar_w: + rc, gc, bc = _sev_colours.get(level, (100,100,100)) + pdf.set_fill_color(rc, gc, bc) + pdf.rect(x + 1, y + 1, bar_w, 4, "F") + pdf.ln() + + # ── 3. Categorized Findings ────────────────────────────────── + for section_title, section_records, cols in [ + ("Credentials", credentials[:150], + [("Identity", 55), ("Password", 45), ("Source", 35), ("Risk", 20), ("Date", 25)]), + ("PII Records", pii[:100], + [("Identity", 55), ("Phone", 35), ("Name", 40), ("Source", 30), ("Risk", 20)]), + ("Dorked Documents", dorked[:80], + [("URL", 100), ("Author", 40), ("Type", 20), ("Risk", 20)]), + ]: + if not section_records: + continue + pdf.add_page() + pdf.set_font("Helvetica", "B", 14) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 9, _pdf_safe(f"Findings - {section_title}"), ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(3) + + # Header row + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30) + pdf.set_text_color(255, 255, 255) + for col_name, col_w in cols: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln() + pdf.set_text_color(0, 0, 0) + + for rec in section_records: + rs = float(_rec_get(rec, "risk_score") or 0) + pdf.set_fill_color(255, 230, 230) if rs >= 90 else \ + pdf.set_fill_color(255, 245, 230) if rs >= 70 else \ + pdf.set_fill_color(255, 255, 255) + pdf.set_font("Helvetica", "", 7) + + ident = _pdf_safe(_rec_get(rec, "email") or _rec_get(rec, "username") or "-", 35) + src = _pdf_safe(_rec_get(rec, "source") or "", 20) + rs_s = f"{rs:.0f}" + bd = _pdf_safe(_rec_get(rec, "breach_date") or "", 10) + + if section_title == "Credentials": + pw = _pdf_safe(_rec_get(rec, "password") or _rec_get(rec, "password_hash") or "", 30) + for val, w in zip([ident, pw, src, rs_s, bd], [c[1] for c in cols]): + pdf.cell(w, 5, val, border=1, fill=True) + elif section_title == "PII Records": + ph = _pdf_safe(_rec_get(rec, "phone") or "", 20) + name = _pdf_safe(_rec_get(rec, "name") or getattr(rec, "full_name", "") or "", 25) + for val, w in zip([ident, ph, name, src, rs_s], [c[1] for c in cols]): + pdf.cell(w, 5, val, border=1, fill=True) + else: # Dorked + meta = getattr(rec, "metadata", {}) or {} + rd = getattr(rec, "raw_data", {}) or {} + url = _pdf_safe(rd.get("url", "") if isinstance(rd, dict) else "", 65) + auth = _pdf_safe(meta.get("author", ""), 25) + ext = _pdf_safe((url.rsplit(".", 1)[-1].split("?")[0] if "." in url else ""), 10) + for val, w in zip([url, auth, ext, rs_s], [c[1] for c in cols]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + + # ── 4. Dork Results ────────────────────────────────────────── + dork_results = data.get("dork_results", []) or [] + if dork_results: + pdf.add_page() + pdf.set_font("Helvetica", "B", 14) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 9, _pdf_safe(f"Dork Results ({len(dork_results)} hits)"), ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(3) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30) + pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("URL / Title", 90), ("Snippet", 55), ("Engine", 20), ("Dork Query", 15)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln() + pdf.set_text_color(0, 0, 0) + for h in dork_results[:200]: + pdf.set_fill_color(245, 245, 255) + pdf.set_font("Helvetica", "", 7) + url = _pdf_safe(h.get("url", h.get("title", "")), 60) + snippet = _pdf_safe(h.get("snippet", ""), 38) + engine = _pdf_safe(h.get("engine", ""), 12) + dork_q = _pdf_safe(h.get("dork", ""), 12) + for val, w in zip([url, snippet, engine, dork_q], [90, 55, 20, 15]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + + # ── 5. Scrape Results ──────────────────────────────────────── + scrape_results = data.get("scrape_results", {}) or {} + pastes = scrape_results.get("pastes", []) + creds_sc = scrape_results.get("credentials", []) + tg_hits = scrape_results.get("telegram", []) + mc_hits = scrape_results.get("dork_misconfigs", []) + + if pastes or creds_sc or tg_hits or mc_hits: + pdf.add_page() + pdf.set_font("Helvetica", "B", 14) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 9, "Scrape Results", ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(3) + + paste_links = { + "Pastebin": "https://pastebin.com/{}", + "Rentry": "https://rentry.co/{}", + "Hastebin": "https://hastebin.com/{}", + "DPaste": "https://dpaste.org/{}", + "Ghostbin": "https://ghostbin.com/paste/{}", + "JustPaste":"https://justpaste.it/{}", + "ControlC": "https://controlc.com/{}", + "Paste2": "https://paste2.org/raw/{}", + "PastebinPro": "https://pastebin.com/{}", + } + + if pastes: + pdf.set_font("Helvetica", "B", 10) + pdf.cell(0, 7, _pdf_safe(f"Pastes ({len(pastes)})"), ln=True) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("Site", 25), ("Paste ID / Link", 80), ("Patterns Found", 75)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for p in pastes[:100]: + pdf.set_fill_color(245, 245, 245); pdf.set_font("Helvetica", "", 7) + site = _pdf_safe(p.get("site", ""), 15) + pid = p.get("id", "") + tmpl = paste_links.get(p.get("site", ""), "") + link = _pdf_safe(tmpl.format(pid) if tmpl and pid else pid, 55) + pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items()), 50) + for val, w in zip([site, link, pats], [25, 80, 75]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + pdf.ln(3) + + if creds_sc: + pdf.set_font("Helvetica", "B", 10) + pdf.cell(0, 7, _pdf_safe(f"Extracted Credentials ({len(creds_sc)})"), ln=True) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("Raw Credential", 120), ("Source", 30), ("Paste ID", 30)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for c in creds_sc[:150]: + pdf.set_fill_color(255, 240, 240); pdf.set_font("Helvetica", "", 7) + raw = _pdf_safe(c.get("raw", ""), 80) + src = _pdf_safe(c.get("source", ""), 20) + pid = _pdf_safe(c.get("paste_id", ""), 20) + for val, w in zip([raw, src, pid], [120, 30, 30]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + pdf.ln(3) + + if tg_hits: + pdf.set_font("Helvetica", "B", 10) + pdf.cell(0, 7, _pdf_safe(f"Telegram CTI ({len(tg_hits)})"), ln=True) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("Channel / Link", 50), ("Message Excerpt", 100), ("Patterns", 30)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for t in tg_hits[:80]: + pdf.set_fill_color(245, 245, 255); pdf.set_font("Helvetica", "", 7) + link = _pdf_safe(f"t.me/s/{t.get('channel','')}", 35) + text = _pdf_safe(t.get("text", ""), 70) + pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items()), 25) + for val, w in zip([link, text, pats], [50, 100, 30]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + pdf.ln(3) + + if mc_hits: + pdf.set_font("Helvetica", "B", 10) + pdf.cell(0, 7, _pdf_safe(f"Misconfigurations ({len(mc_hits)})"), ln=True) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("URL", 90), ("Title", 60), ("Dork", 30)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for m in mc_hits[:80]: + pdf.set_fill_color(255, 245, 230); pdf.set_font("Helvetica", "", 7) + url_m = _pdf_safe(m.get("url", ""), 60) + title_m = _pdf_safe(m.get("title", ""), 40) + dork_m = _pdf_safe(m.get("dork", ""), 25) + for val, w in zip([url_m, title_m, dork_m], [90, 60, 30]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + + # ── 6. Discovered Assets ───────────────────────────────────── + discovered_assets = data.get("discovered_assets", []) or [] + if discovered_assets: + pdf.add_page() + pdf.set_font("Helvetica", "B", 14) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 9, _pdf_safe(f"Discovered Assets ({len(discovered_assets)} reinjected identifiers)"), ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(3) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("Asset", 65), ("Type", 20), ("Phase", 20), ("Reference (Source/URL/Paste)", 55), ("From", 20)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + _phase_fills = {"breach": (255,230,230), "dork": (255,245,220), + "scrape": (245,230,255), "hash_crack": (245,230,255)} + for da in discovered_assets[:300]: + phase = da.get("phase", "?") + pdf.set_fill_color(*_phase_fills.get(phase, (245, 245, 245))) + pdf.set_font("Helvetica", "", 7) + for val, w in zip([ + _pdf_safe(da.get("asset", ""), 45), + _pdf_safe(da.get("qtype", ""), 12), + _pdf_safe(phase, 12), + _pdf_safe(da.get("ref", ""), 38), + _pdf_safe(da.get("parent", ""), 14), + ], [65, 20, 20, 55, 20]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + + # ── 7. Pivot Tree ───────────────────────────────────────────── + pivot_log = data.get("pivot_log", []) or [] + if pivot_log: + pdf.add_page() + pdf.set_font("Helvetica", "B", 14) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 9, _pdf_safe(f"Pivot Tree ({len(pivot_log)} nodes)"), ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(3) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("D", 8), ("Asset", 55), ("Type", 18), ("Phase", 18), ("Parent", 40), ("Breach", 12), ("Dorks", 12), ("Scrape", 12), ("Cracked", 5)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for e in pivot_log[:300]: + pdf.set_fill_color(245, 245, 245); pdf.set_font("Helvetica", "", 7) + cracked_str = _pdf_safe(", ".join(e.get("cracked", [])[:2]), 10) + for val, w in zip([ + str(e.get("depth", 0)), + _pdf_safe(e.get("asset", ""), 38), + _pdf_safe(e.get("qtype", ""), 12), + _pdf_safe(e.get("found_in", ""), 12), + _pdf_safe(e.get("parent") or "", 28), + str(e.get("records", 0)), + str(e.get("dorks", 0)), + str(e.get("scrape", 0)), + cracked_str, + ], [8, 55, 18, 18, 40, 12, 12, 12, 5]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + + # ── 8. Identity Graph Placeholder ──────────────────────────── + pdf.add_page() + pdf.set_font("Helvetica", "B", 14) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 9, "Identity Relationship Map", ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(4) + + emails = sorted({_rec_get(r, "email") for r in records if _rec_get(r, "email")})[:8] + phones = sorted({_rec_get(r, "phone") for r in records if _rec_get(r, "phone")})[:6] + usernames = sorted({_rec_get(r, "username") for r in records if _rec_get(r, "username")})[:6] + passwords = sorted({_rec_get(r, "password") for r in records if _rec_get(r, "password")})[:5] + + pdf.set_font("Courier", "", 8) + pdf.set_fill_color(245, 255, 245) + pdf.rect(15, pdf.get_y(), 180, 120, "F") + pdf.set_xy(18, pdf.get_y() + 3) + + graph_lines = [_pdf_safe(f"[*] TARGET: {target}")] + for grp, items, label in [ + (emails, emails, "email"), + (phones, phones, "phone"), + (usernames, usernames, "username"), + (passwords, passwords, "password"), + ]: + if not items: + continue + graph_lines.append(f" +-- [{label}]") + for i, v in enumerate(items): + pfx = " | \\--" if i == len(items) - 1 else " | +--" + graph_lines.append(_pdf_safe(f"{pfx} {v}", 80)) + + for line in graph_lines[:30]: + pdf.cell(0, 4, line, ln=True) + pdf.set_x(18) + + pdf.output(path) + out("ok", f"Forensic PDF saved: {path}") + + +# ======================================================================= +# CLI ENTRY POINT +# ======================================================================= +def main() -> None: + initialize_environment() + _base = os.path.basename(sys.argv[0]) + _prog = os.environ.get("NOX_PROG_NAME") or (f"python3 {_base}" if _base.endswith(".py") else _base) + parser = argparse.ArgumentParser( + prog=_prog, + description=f"NOX v{VERSION} — OSINT Breach Intelligence (120+ JSON plugin sources)", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=( + lambda p: f"""Examples: + {p} Interactive mode + {p} -t user@email.com Scan email + {p} -t example.com Scan domain + {p} -t example.com --fullscan Full assault + pivot + {p} --dork user@email.com Google dorking + {p} --scrape user@email.com Web scraping + Telegram + {p} --crack Crack a hash + {p} --analyze "P@ssw0rd" Password analysis + {p} --list-sources List loaded plugins with key status +""" + )(_prog)) + parser.add_argument("-t","--target", help="Target to scan") + parser.add_argument("-i","--interactive", action="store_true", help="Interactive mode") + parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}") + parser.add_argument("--autoscan", action="store_true", help="Full autoscan: scan+pivot+dork+scrape (no args needed, uses -t)") + parser.add_argument("--fullscan", action="store_true", help="Full scan+pivot (alias for --autoscan without dork/scrape)") + parser.add_argument("--no-pivot", action="store_true", help="Disable recursive pivot enrichment") + parser.add_argument("--depth", type=int, default=None, metavar="N", help="Avalanche pivot depth (default: 2)") + parser.add_argument("--dork", metavar="TARGET", help="Google dorking") + parser.add_argument("--scrape", metavar="TARGET", help="Web scraping + Telegram indexing") + parser.add_argument("--crack", metavar="HASH", help="Crack a hash (WARNING: submits hash to public rainbow-table APIs — use --no-online-crack to disable)") + parser.add_argument("--no-online-crack", action="store_true", + help="Disable online rainbow-table APIs for hash cracking (local wordlist only, no data sent to third parties)") + parser.add_argument("--analyze", metavar="PASS", help="Analyze password") + parser.add_argument("--list-sources", action="store_true", help="List loaded plugins with input_type, confidence, key status") + parser.add_argument("--tor", action="store_true", help="Enable Tor") + parser.add_argument("--proxy", metavar="URL", help="HTTP/S or SOCKS5 proxy URL") + parser.add_argument("--allow-leak", action="store_true", + help="Bypass fail-safe: allow direct connection if proxy/Tor is unavailable (OPSEC risk)") + parser.add_argument("--guardian-off", action="store_true", + help="Alias for --allow-leak: disable Guardian OPSEC kill-switch (direct connection)") + parser.add_argument("--reset-sources", action="store_true", + help="Force resync of all source plugins from package (overwrites user modifications)") + parser.add_argument("--threads", type=int, default=20, help="Max concurrency") + parser.add_argument("--timeout", type=int, default=15, help="Request timeout") + parser.add_argument("-o","--output", metavar="FILE", help="Output file") + parser.add_argument("--format", choices=["json","csv","html","md","pdf"], default="json", help="Output format") + parser.add_argument("--diff", action="store_true", + help="Compare current scan against the last cached scan and highlight new findings only") + + args = parser.parse_args() + config = NoxConfig() + # Apply ~/.nox/config.ini settings before CLI args (CLI takes precedence) + Config.apply(config) + if args.tor: + config.use_tor = True + config.proxy = f"socks5h://127.0.0.1:{config.tor_socks}" + if args.proxy: + config.proxy = args.proxy + # K2: --guardian-off is an alias for --allow-leak + config.allow_leak = args.allow_leak or getattr(args, "guardian_off", False) + config.no_online_crack = getattr(args, "no_online_crack", False) + config.max_threads = config.concurrency = args.threads + config.timeout = args.timeout + # A9/I3: store no_pivot and depth in config so REPL and AvalancheScanner can read them + config.no_pivot = args.no_pivot + if getattr(args, "depth", None) is not None: + config.pivot_depth = args.depth + + db = NoxDB() + try: + _main_run(args, config, db) + finally: + db.close() + + +def _main_run(args, config: NoxConfig, db: NoxDB) -> None: + orc = Orchestrator(config, db) + + # --list-sources + if getattr(args, "list_sources", False): + repl = REPL.__new__(REPL) + repl.orc = orc + repl.db = db + repl.config = config + repl._sources() + return + + # B6: --reset-sources forces a full resync from package + if getattr(args, "reset_sources", False): + import shutil as _shutil + candidate = _PKG_ROOT / "sources" + if not candidate.is_dir(): + candidate = Path("/usr/share/nox-cli/sources") + if candidate.is_dir(): + count = 0 + for jf in candidate.glob("*.json"): + dst = SOURCE_DIR / jf.name + try: + _shutil.copy2(jf, dst) + count += 1 + except OSError: + pass + out("ok", f"Reset {count} source plugins from package.") + else: + out("warn", "Package sources directory not found.") + return + + if args.crack: + if getattr(config, "no_online_crack", False): + out("warn", "Online rainbow-table APIs disabled (--no-online-crack). Local wordlist only.") + result = orc.crack(args.crack) + out("info", f"Types: {', '.join(t[0] for t in result.get('types',[]))}") + if result.get("plaintext"): out("ok", f"CRACKED: {result['plaintext']} (via {result['method']})") + else: out("warn", "Could not crack.") + return + + if args.analyze: + repl = REPL.__new__(REPL) + repl.orc = orc + repl._analyze(args.analyze) + return + + if args.dork: + results = orc.dork(args.dork) + out("ok", f"Dorking: {len(results)} results") + for i, r in enumerate(results[:20], 1): + title = (r.get('title','') or r.get('dork',''))[:70] + url = r.get("url", "") + snippet = r.get("snippet", "")[:100] + dork_q = r.get("dork", "")[:60] + engine = r.get("engine", "") + eng_tag = f" {C.DM}[{engine}]{C.X}" if engine else "" + print(f" {C.Y}{i:2}.{C.W} {title}{eng_tag}") + if url: print(f" {C.DM}{url[:80]}{C.X}") + if snippet: print(f" {C.DM}{snippet}{C.X}") + if dork_q and dork_q != title: print(f" {C.DM}dork: {dork_q}{C.X}") + if len(results) > 20: + print(f" {C.DM} … and {len(results)-20} more — use -o for full export{C.X}") + if args.output: + data = {"target": args.dork, "records": [], "dork_results": results, "scrape_results": {}} + if args.format == "json": Reporter.to_json(data, args.output) + elif args.format == "html": Reporter.to_html(data, args.output) + elif args.format == "md": Reporter.to_markdown(data, args.output) + elif args.format == "pdf": Reporter.to_pdf(data, args.output) + elif args.format == "csv": + resolved = Reporter._resolve_path(args.output, "csv") + import csv as _csv + with open(resolved, "w", newline="", encoding="utf-8") as f: + w = _csv.DictWriter(f, fieldnames=["url","title","snippet","dork","engine"], extrasaction="ignore") + w.writeheader(); w.writerows(results) + out("ok", f"Dork CSV saved: {resolved}") + return + + if args.scrape: + results = orc.scrape(args.scrape) + pastes = results.get('pastes',[]); creds = results.get('credentials',[]) + tg = results.get('telegram',[]); mc = results.get('dork_misconfigs',[]) + out("ok", f"Pastes: {len(pastes)} | Credentials: {len(creds)} | " + f"Hashes: {len(results.get('hashes',[]))} | Telegram: {len(tg)} | Misconfigs: {len(mc)}") + _ptmpl = {"Pastebin":"https://pastebin.com/{}","Rentry":"https://rentry.co/{}", + "Hastebin":"https://hastebin.com/{}","DPaste":"https://dpaste.org/{}"} + for p in pastes[:8]: + pid = p.get("id",""); site = p.get("site","") + url = _ptmpl.get(site,"").format(pid) if _ptmpl.get(site) and pid else "" + pats = ", ".join(f"{k}({len(v)})" for k,v in (p.get("patterns") or {}).items()) + print(f" {C.P}[paste]{C.W} [{site}] {(p.get('title') or pid)[:50]} {C.DM}{pats}{C.X}") + if url: print(f" {C.DM}{url}{C.X}") + if len(pastes) > 8: print(f" {C.DM} … and {len(pastes)-8} more pastes{C.X}") + for c in creds[:12]: + src = c.get("source",""); pid = c.get("paste_id","") + ref = f"[{src or pid}]" if (src or pid) else "" + print(f" {C.R}[cred]{C.W} {c.get('raw','')[:80]} {C.DM}{ref}{C.X}") + if len(creds) > 12: print(f" {C.DM} … and {len(creds)-12} more credentials{C.X}") + for t in tg[:5]: + pats = ", ".join(f"{k}({len(v)})" for k,v in (t.get("patterns") or {}).items()) + print(f" {C.CY}[tg]{C.W} [{t.get('channel','')}] {t.get('text','')[:70]} {C.DM}{pats}{C.X}") + if len(tg) > 5: print(f" {C.DM} … and {len(tg)-5} more telegram hits{C.X}") + for m in mc[:5]: + print(f" {C.O}[misc]{C.W} {m.get('title','')[:60]}") + if m.get("url"): print(f" {C.DM}{m['url'][:80]}{C.X}") + if m.get("dork"): print(f" {C.DM}dork: {m['dork'][:60]}{C.X}") + if len(mc) > 5: print(f" {C.DM} … and {len(mc)-5} more misconfigs{C.X}") + if args.output: + data = {"target": args.scrape, "records": [], "dork_results": [], "scrape_results": results} + if args.format == "json": Reporter.to_json(data, args.output) + elif args.format == "html": Reporter.to_html(data, args.output) + elif args.format == "md": Reporter.to_markdown(data, args.output) + elif args.format == "pdf": Reporter.to_pdf(data, args.output) + elif args.format == "csv": + REPL._export_csv_extras(data, Reporter._resolve_path(args.output, "csv")) + return + + if args.target: + if args.autoscan or args.fullscan: + try: + result = asyncio.run(orc.fullscan(args.target, pivot=not args.no_pivot)) + except KeyboardInterrupt: + print() + out("warn", "Scan interrupted.") + sys.exit(0) + records = result.get("records",[]) + else: + records = orc.scan(args.target) + HVTAnalyzer.annotate(records) + result = { + "target": args.target, + "records": records, + "analysis": CredAnalyzer.analyze(records), + "hvt_records": HVTAnalyzer.filter_hvt(records), + "dork_results": [], + "scrape_results": {}, + "pivot_chain": [args.target], + "pivot_log": [], + "discovered_assets": [], + "scan_meta": {"pivot_depth": 0, "nodes_discovered": len(records)}, + } + analysis = result.get("analysis") or CredAnalyzer.analyze(records) + + # ── --diff: surface only new findings vs last cached scan ── + if getattr(args, "diff", False): + try: + prev_rows = db.get_creds(args.target) + prev_keys = { + hashlib.sha256( + f"{r.get('email','') or r.get('username','')}:{r.get('password','')}".encode() + ).hexdigest() + for r in prev_rows + } + new_records = [ + r for r in records + if hashlib.sha256( + f"{r.email or r.username}:{r.password}".encode() + ).hexdigest() not in prev_keys + ] + out("info", f"--diff: {len(new_records)} new findings vs last cached scan ({len(records) - len(new_records)} already known)") + records = new_records + result["records"] = new_records + except Exception as _de: + out("warn", f"--diff failed, showing full results: {_de}") + repl = REPL.__new__(REPL) + repl.orc = orc + repl.db = db + repl.config = config + repl._last_full = result + repl._last = records + repl._print_summary(analysis) + if args.autoscan or args.fullscan: + dorks = result.get("dork_results",[]) + if dorks: + out("info", f"Dorking Results: {len(dorks)}") + for d in dorks[:10]: + title = (d.get('title','') or d.get('dork',''))[:70] + print(f" {C.Y}→{C.W} {title}") + if d.get("url"): print(f" {C.DM}{d['url'][:80]}{C.X}") + if len(dorks) > 10: + print(f" {C.DM} … and {len(dorks)-10} more — use -o for full export{C.X}") + scrape = result.get("scrape_results",{}) + creds = scrape.get("credentials",[]) + if creds: + out("info", f"Scraped Credentials: {len(creds)}") + for c in creds[:10]: + print(f" {C.R}→{C.W} {c.get('raw','')}") + if len(creds) > 10: + print(f" {C.DM} … and {len(creds)-10} more{C.X}") + tg = scrape.get("telegram",[]) + if tg: + out("info", f"Telegram Hits: {len(tg)}") + for t in tg[:5]: + print(f" {C.CY}→{C.W} [{t.get('channel','')}] {t.get('text','')[:80]}") + if len(tg) > 5: + print(f" {C.DM} … and {len(tg)-5} more{C.X}") + mc = scrape.get("dork_misconfigs",[]) + if mc: + out("info", f"Misconfigurations: {len(mc)}") + for m in mc[:5]: + print(f" {C.O}→{C.W} {m.get('title','')[:70]}") + if len(mc) > 5: + print(f" {C.DM} … and {len(mc)-5} more{C.X}") + da = result.get("discovered_assets", []) + if da: + out("info", f"Reinjected Assets: {len(da)}") + _pc = {"breach": C.R, "dork": C.O, "scrape": C.P, "hash_crack": C.P} + for d in da[:15]: + pc = _pc.get(d.get("phase",""), C.DM) + print(f" {pc}[{d.get('phase','?')}]{C.W} {d.get('asset','')} " + f"{C.DM}({d.get('qtype','')}) ← {d.get('ref','')[:60]}{C.X}") + if len(da) > 15: + print(f" {C.DM} … and {len(da)-15} more — use -o for full export{C.X}") + if args.output: + if args.format == "json": Reporter.to_json(result, args.output) + elif args.format == "csv": + Reporter.to_csv(records, args.output) + REPL._export_csv_extras(result, Reporter._resolve_path(args.output, "csv")) + elif args.format == "html": Reporter.to_html(result, args.output) + elif args.format == "md": Reporter.to_markdown(result, args.output) + elif args.format == "pdf": Reporter.to_pdf(result, args.output) + return + + # Interactive mode + repl = REPL() + repl.orc = orc + repl.config = config + repl.db = db + repl.run() + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print() + out("warn", "Interrupted.") + sys.exit(0) diff --git a/postinst.sh b/postinst.sh new file mode 100755 index 0000000..e44d0ac --- /dev/null +++ b/postinst.sh @@ -0,0 +1,51 @@ +#!/bin/sh +set -e + +INSTALL_DIR="/opt/nox-cli" +VENV="$INSTALL_DIR/.venv" +WRAPPER="$INSTALL_DIR/nox-wrapper.sh" +BIN="/usr/bin/nox-cli" +NOX_VERSION=$(grep '^VERSION=' "$INSTALL_DIR/build_deb.sh" 2>/dev/null | cut -d'"' -f2 || echo "1.0.0") + +case "$1" in + configure) + echo "[*] NOX Framework: Setting up isolated virtual environment..." + + # 1. Create venv if absent + if [ ! -f "$VENV/bin/python" ]; then + python3 -m venv "$VENV" + echo "[+] Virtual environment created at $VENV" + else + echo "[*] Virtual environment already exists — skipping creation." + fi + + # 2. Upgrade pip inside venv + "$VENV/bin/pip" install --quiet --upgrade pip + + # 3. Install dependencies strictly inside venv + "$VENV/bin/pip" install --quiet -r "$INSTALL_DIR/requirements.txt" + echo "[+] Dependencies installed." + + # 4. Build source plugins + "$VENV/bin/python" "$INSTALL_DIR/build_sources.py" > /dev/null 2>&1 || true + chmod -R 644 "$INSTALL_DIR/sources/"*.json 2>/dev/null || true + echo "[+] Source plugins built." + + # 5. Link wrapper to /usr/bin/nox-cli + chmod +x "$WRAPPER" + ln -sf "$WRAPPER" "$BIN" + echo "[+] Executable linked: $BIN" + + echo "[+] NOX v${NOX_VERSION} installed. Run: nox-cli --help" + ;; + + abort-upgrade|abort-remove|abort-deconfigure) + ;; + + *) + echo "postinst called with unknown argument: $1" >&2 + exit 1 + ;; +esac + +exit 0 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1cafac4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +[build-system] +requires = ["setuptools>=68.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "nox-cli" +version = "1.0.0" +description = "Advanced Asynchronous Cyber Threat Intelligence Framework" +readme = { file = "README.md", content-type = "text/markdown" } +license = { text = "Apache-2.0" } +authors = [{ name = "nox-project" }] +requires-python = ">=3.8" +dependencies = [ + "aiohttp>=3.9.0", + "aiohttp-socks>=0.8.4", + "aiosqlite>=0.20.0", + "httpx[http2]>=0.27.0", + "requests>=2.31.0", + "certifi>=2024.2.2", + "cloudscraper>=1.2.71", + "beautifulsoup4>=4.12.3", + "lxml>=5.1.0", + "dnspython>=2.6.0", + "phonenumbers>=8.13.0", + "pydantic>=2.0.0", + "pydantic-core>=2.0.0", + "colorama>=0.4.6", + "rich>=13.7.0", + "stem>=1.8.2", + "fpdf2>=2.7.9", +] + +[project.scripts] +nox-cli = "nox:main" + +[tool.setuptools.packages.find] +where = ["."] + +[tool.setuptools.package-data] +"*" = ["sources/*.json"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..acbd38a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,34 @@ +# NOX — Cyber Threat Intelligence Framework +# Python 3.8+ | pip install -r requirements.txt + +# ── Core (Async) ─────────────────────────────────────────────────────── +aiohttp>=3.9.0 +aiohttp-socks>=0.8.4 # SOCKS4/5 proxy support for aiohttp +aiosqlite>=0.20.0 # async SQLite (forensic persistence DB) +httpx[http2]>=0.27.0 # Guardian Engine: dynamic proxy fetch + HTTP/2 + +# ── Intelligence & Scraping ──────────────────────────────────────────── +requests>=2.31.0 +certifi>=2024.2.2 # up-to-date CA bundle for SSL verification +cloudscraper>=1.2.71 # Cloudflare-protected endpoint bypass +beautifulsoup4>=4.12.3 +lxml>=5.1.0 # fast BS4 parser; required for HTML/XML scraping +dnspython>=2.6.0 # DNS resolution (MX, A, TXT lookups) +phonenumbers>=8.13.0 # phone number parsing and validation + +# ── Validation ───────────────────────────────────────────────────────── +pydantic>=2.0.0 # source schema validation and build engine +pydantic-core>=2.0.0 # Rust-backed core for Pydantic v2 + +# ── CLI / UI ─────────────────────────────────────────────────────────── +colorama>=0.4.6 +rich>=13.7.0 + +# ── Tor Circuit Control ──────────────────────────────────────────────── +# Requires the system `tor` package (sudo apt install tor on Kali). +# Used by --tor flag and the `tor` REPL command. +stem>=1.8.2 + +# ── Reporting ────────────────────────────────────────────────────────── +# Required for `--format pdf` and `export --format pdf`. +fpdf2>=2.7.9 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ef86188 --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +from setuptools import setup +from pathlib import Path + +requirements = [ + line.strip() + for line in Path("requirements.txt").read_text().splitlines() + if line.strip() and not line.startswith("#") +] + +setup( + name="nox-cli", + version="1.0.0", + author="nox-project", + description="Advanced Asynchronous Cyber Threat Intelligence Framework", + long_description=Path("README.md").read_text(), + long_description_content_type="text/markdown", + license="Apache-2.0", + python_requires=">=3.8", + py_modules=["nox"], + install_requires=requirements, + entry_points={ + "console_scripts": [ + "nox-cli=nox:main", + ], + }, + data_files=[ + ("share/nox-cli/sources", [str(p) for p in Path("sources").glob("*.json")]), + ("share/man/man1", ["docs/nox-cli.1"]), + ], +) diff --git a/sources/abstract_email.json b/sources/abstract_email.json new file mode 100644 index 0000000..c1aec1d --- /dev/null +++ b/sources/abstract_email.json @@ -0,0 +1,29 @@ +{ + "name": "abstract_email", + "category": "email_rep", + "endpoint": "https://emailvalidation.abstractapi.com/v1/?api_key={ABSTRACT_API_KEY}&email={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "quality": "$.quality_score" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{ABSTRACT_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://emailvalidation.abstractapi.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/abuseipdb.json b/sources/abuseipdb.json new file mode 100644 index 0000000..004d3da --- /dev/null +++ b/sources/abuseipdb.json @@ -0,0 +1,33 @@ +{ + "name": "abuseipdb", + "category": "threat_intel", + "endpoint": "https://api.abuseipdb.com/api/v2/check?ipAddress={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "score": "$.data.abuseConfidenceScore" + }, + "rate_limit": 1.0, + "headers": { + "Key": "{ABUSEIPDB_API_KEY}" + }, + "api_key_slots": [ + "{ABUSEIPDB_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "abuseConfidenceScore": "abuse_score" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.abuseipdb.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/alienvault_otx_domain.json b/sources/alienvault_otx_domain.json new file mode 100644 index 0000000..a4195c8 --- /dev/null +++ b/sources/alienvault_otx_domain.json @@ -0,0 +1,29 @@ +{ + "name": "alienvault_otx_domain", + "category": "threat_intel", + "endpoint": "https://otx.alienvault.com/api/v1/indicators/domain/{target}/general", + "method": "GET", + "requires_auth": false, + "selectors": { + "pulses": "$.pulse_info.count", + "tags": "$.tags" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "domain", + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://otx.alienvault.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/alienvault_otx_ip.json b/sources/alienvault_otx_ip.json new file mode 100644 index 0000000..c74b0a2 --- /dev/null +++ b/sources/alienvault_otx_ip.json @@ -0,0 +1,28 @@ +{ + "name": "alienvault_otx_ip", + "category": "threat_intel", + "endpoint": "https://otx.alienvault.com/api/v1/indicators/IPv4/{target}/general", + "method": "GET", + "requires_auth": false, + "selectors": { + "asn": "$.asn", + "country": "$.country_name" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://otx.alienvault.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/alienvault_otx_malware.json b/sources/alienvault_otx_malware.json new file mode 100644 index 0000000..dd49715 --- /dev/null +++ b/sources/alienvault_otx_malware.json @@ -0,0 +1,27 @@ +{ + "name": "alienvault_otx_malware", + "category": "threat_intel", + "endpoint": "https://otx.alienvault.com/api/v1/indicators/file/{target}/analysis", + "method": "GET", + "requires_auth": false, + "selectors": { + "malware": "$.analysis.malware" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://otx.alienvault.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/alienvault_otx_user.json b/sources/alienvault_otx_user.json new file mode 100644 index 0000000..4097fcf --- /dev/null +++ b/sources/alienvault_otx_user.json @@ -0,0 +1,26 @@ +{ + "name": "alienvault_otx_user", + "category": "social", + "endpoint": "https://otx.alienvault.com/api/v1/users/{target}/general", + "method": "GET", + "requires_auth": false, + "selectors": { + "pulses": "$.pulse_count" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://otx.alienvault.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/anubis_subdomains.json b/sources/anubis_subdomains.json new file mode 100644 index 0000000..1e6a00c --- /dev/null +++ b/sources/anubis_subdomains.json @@ -0,0 +1,27 @@ +{ + "name": "anubis_subdomains", + "category": "dns_recon", + "endpoint": "https://jldc.me/anubis/subdomains/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "subdomains": "$.*" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://jldc.me", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/anyrun.json b/sources/anyrun.json new file mode 100644 index 0000000..d271d6f --- /dev/null +++ b/sources/anyrun.json @@ -0,0 +1,32 @@ +{ + "name": "anyrun", + "category": "threat_intel", + "endpoint": "https://api.any.run/v1/analysis?hash={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "tasks": "$.tasks" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "API-Key {ANYRUN_API_KEY}" + }, + "api_key_slots": [ + "{ANYRUN_API_KEY}" + ], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat", + "heavy" + ], + "health_check_url": "https://api.any.run", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/bgpview_ip.json b/sources/bgpview_ip.json new file mode 100644 index 0000000..1ef9fe1 --- /dev/null +++ b/sources/bgpview_ip.json @@ -0,0 +1,27 @@ +{ + "name": "bgpview_ip", + "category": "network", + "endpoint": "https://api.bgpview.io/ip/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "prefixes": "$.data.prefixes[*].prefix" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://api.bgpview.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/binaryedge_dns.json b/sources/binaryedge_dns.json new file mode 100644 index 0000000..be7e551 --- /dev/null +++ b/sources/binaryedge_dns.json @@ -0,0 +1,30 @@ +{ + "name": "binaryedge_dns", + "category": "dns_recon", + "endpoint": "https://api.binaryedge.io/v2/query/domains/subdomain/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "subs": "$.subs" + }, + "rate_limit": 1.0, + "headers": { + "X-Key": "{BINARYEDGE_API_KEY}" + }, + "api_key_slots": [ + "{BINARYEDGE_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.binaryedge.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/binaryedge_exposed.json b/sources/binaryedge_exposed.json new file mode 100644 index 0000000..22422a2 --- /dev/null +++ b/sources/binaryedge_exposed.json @@ -0,0 +1,33 @@ +{ + "name": "binaryedge_exposed", + "category": "scanners", + "endpoint": "https://api.binaryedge.io/v2/query/ip/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "ports": "$.events[*].port" + }, + "rate_limit": 1.0, + "headers": { + "X-Key": "{BINARYEDGE_API_KEY}" + }, + "api_key_slots": [ + "{BINARYEDGE_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "port": "open_port" + }, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://api.binaryedge.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/bing_search_api.json b/sources/bing_search_api.json new file mode 100644 index 0000000..b9aa327 --- /dev/null +++ b/sources/bing_search_api.json @@ -0,0 +1,30 @@ +{ + "name": "bing_search_api", + "category": "search", + "endpoint": "https://api.bing.microsoft.com/v7.0/search?q={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "urls": "$.webPages.value[*].url" + }, + "rate_limit": 1.0, + "headers": { + "Ocp-Apim-Subscription-Key": "{BING_API_KEY}" + }, + "api_key_slots": [ + "{BING_API_KEY}" + ], + "input_type": "any", + "output_type": [ + "url" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.bing.microsoft.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/breachaware.json b/sources/breachaware.json new file mode 100644 index 0000000..3f2096c --- /dev/null +++ b/sources/breachaware.json @@ -0,0 +1,32 @@ +{ + "name": "breachaware", + "category": "breaches", + "endpoint": "https://api.breachaware.com/v1/search?query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "breaches": "$.breaches" + }, + "rate_limit": 1.0, + "headers": { + "X-API-KEY": "{BA_API_KEY}" + }, + "api_key_slots": [ + "{BA_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://api.breachaware.com", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/breachdirectory.json b/sources/breachdirectory.json new file mode 100644 index 0000000..ad7891e --- /dev/null +++ b/sources/breachdirectory.json @@ -0,0 +1,29 @@ +{ + "name": "breachdirectory", + "category": "breaches", + "endpoint": "https://breachdirectory.com/api/search?key={BD_API_KEY}&email={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "found": "$.found" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{BD_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://breachdirectory.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/censys_hosts.json b/sources/censys_hosts.json new file mode 100644 index 0000000..5274d84 --- /dev/null +++ b/sources/censys_hosts.json @@ -0,0 +1,33 @@ +{ + "name": "censys_hosts", + "category": "scanners", + "endpoint": "https://search.censys.io/api/v2/hosts/search?q={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "results": "$.result.hits[*].ip" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Basic {CENSYS_AUTH_BASE64}" + }, + "api_key_slots": [ + "{CENSYS_AUTH_BASE64}" + ], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": { + "ip": "ip_address" + }, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://search.censys.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/checkleaked.json b/sources/checkleaked.json new file mode 100644 index 0000000..5e8d786 --- /dev/null +++ b/sources/checkleaked.json @@ -0,0 +1,30 @@ +{ + "name": "checkleaked", + "category": "breaches", + "endpoint": "https://api.checkleaked.cc/check/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "found": "$.found" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://api.checkleaked.cc", + "expected_status": 200, + "reliability_score": 2, + "is_volatile": true, + "backup_endpoints": [ + "https://checkleaked.cc/api/check/{target}" + ], + "confidence": 0.55 +} \ No newline at end of file diff --git a/sources/circl_lu_pdns.json b/sources/circl_lu_pdns.json new file mode 100644 index 0000000..0098ea3 --- /dev/null +++ b/sources/circl_lu_pdns.json @@ -0,0 +1,30 @@ +{ + "name": "circl_lu_pdns", + "category": "dns_recon", + "endpoint": "https://www.circl.lu/pdns/query/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "resolutions": "$.[*].rdata" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Basic {CIRCL_AUTH_BASE64}" + }, + "api_key_slots": [ + "{CIRCL_AUTH_BASE64}" + ], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://www.circl.lu", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/cit0day.json b/sources/cit0day.json new file mode 100644 index 0000000..9a6e475 --- /dev/null +++ b/sources/cit0day.json @@ -0,0 +1,32 @@ +{ + "name": "cit0day", + "category": "breaches", + "endpoint": "https://cit0day.in/api/v1/search?query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "results": "$.results" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Bearer {CIT0DAY_API_KEY}" + }, + "api_key_slots": [ + "{CIT0DAY_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://cit0day.in", + "expected_status": 200, + "reliability_score": 2, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.55 +} \ No newline at end of file diff --git a/sources/clearbit_enrich.json b/sources/clearbit_enrich.json new file mode 100644 index 0000000..04f6699 --- /dev/null +++ b/sources/clearbit_enrich.json @@ -0,0 +1,33 @@ +{ + "name": "clearbit_enrich", + "category": "enrichment", + "endpoint": "https://person.clearbit.com/v2/people/find?email={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "full_name": "$.name.fullName" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Bearer {CLEARBIT_API_KEY}" + }, + "api_key_slots": [ + "{CLEARBIT_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "username", + "domain" + ], + "normalization_map": { + "fullName": "full_name" + }, + "tags": [ + "passive" + ], + "health_check_url": "https://person.clearbit.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/criminalip_asset.json b/sources/criminalip_asset.json new file mode 100644 index 0000000..15cf621 --- /dev/null +++ b/sources/criminalip_asset.json @@ -0,0 +1,33 @@ +{ + "name": "criminalip_asset", + "category": "scanners", + "endpoint": "https://api.criminalip.io/v1/asset/ip/report?ip={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "score": "$.score" + }, + "rate_limit": 1.0, + "headers": { + "x-api-key": "{CRIMINALIP_API_KEY}" + }, + "api_key_slots": [ + "{CRIMINALIP_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "score": "risk_score" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.criminalip.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/crt_sh.json b/sources/crt_sh.json new file mode 100644 index 0000000..e4e64e8 --- /dev/null +++ b/sources/crt_sh.json @@ -0,0 +1,31 @@ +{ + "name": "crt_sh", + "category": "certificate_transparency", + "endpoint": "https://crt.sh/?q={target}&output=json", + "method": "GET", + "requires_auth": false, + "selectors": { + "domains": "$.*.name_value" + }, + "rate_limit": 1.0, + "headers": { + "Accept": "application/json" + }, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": { + "name_value": "domain" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://crt.sh", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/cve_search.json b/sources/cve_search.json new file mode 100644 index 0000000..242f5ae --- /dev/null +++ b/sources/cve_search.json @@ -0,0 +1,28 @@ +{ + "name": "cve_search", + "category": "vulns", + "endpoint": "https://cve.circl.lu/api/cve/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "summary": "$.summary" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "cve", + "output_type": [ + "cve" + ], + "normalization_map": { + "summary": "vuln_description" + }, + "tags": [ + "passive" + ], + "health_check_url": "https://cve.circl.lu", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/cxsecurity.json b/sources/cxsecurity.json new file mode 100644 index 0000000..489e323 --- /dev/null +++ b/sources/cxsecurity.json @@ -0,0 +1,27 @@ +{ + "name": "cxsecurity", + "category": "vulns", + "endpoint": "https://cxsecurity.com/cvejson.php?cve={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "title": "$.title" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "cve", + "output_type": [ + "cve" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://cxsecurity.com", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/dehashed.json b/sources/dehashed.json new file mode 100644 index 0000000..7481b68 --- /dev/null +++ b/sources/dehashed.json @@ -0,0 +1,41 @@ +{ + "name": "dehashed", + "category": "breaches", + "endpoint": "https://api.dehashed.com/search?query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "entries": "$.entries" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Basic {DEHASHED_AUTH_BASE64}", + "Accept": "application/json" + }, + "api_key_slots": [ + "{DEHASHED_AUTH_BASE64}" + ], + "input_type": "email", + "output_type": [ + "email", + "username", + "ip" + ], + "normalization_map": { + "email": "email_address", + "username": "username", + "password": "plaintext_password", + "hashed_password": "password_hash", + "ip_address": "ip_address", + "name": "full_name" + }, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://api.dehashed.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/dnsdb_pdns.json b/sources/dnsdb_pdns.json new file mode 100644 index 0000000..81b8ff4 --- /dev/null +++ b/sources/dnsdb_pdns.json @@ -0,0 +1,30 @@ +{ + "name": "dnsdb_pdns", + "category": "dns_recon", + "endpoint": "https://api.dnsdb.info/lookup/rrset/name/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "rdata": "$.[*].rdata" + }, + "rate_limit": 1.0, + "headers": { + "X-API-Key": "{DNSDB_API_KEY}" + }, + "api_key_slots": [ + "{DNSDB_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.dnsdb.info", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/domaintools_whois.json b/sources/domaintools_whois.json new file mode 100644 index 0000000..a9b564b --- /dev/null +++ b/sources/domaintools_whois.json @@ -0,0 +1,31 @@ +{ + "name": "domaintools_whois", + "category": "whois", + "endpoint": "https://api.domaintools.com/v1/{target}/whois/", + "method": "GET", + "requires_auth": true, + "selectors": { + "whois": "$.response.whois.record" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Basic {DT_AUTH_BASE64}" + }, + "api_key_slots": [ + "{DT_AUTH_BASE64}" + ], + "input_type": "domain", + "output_type": [ + "email", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.domaintools.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/duckduckgo_api.json b/sources/duckduckgo_api.json new file mode 100644 index 0000000..319195d --- /dev/null +++ b/sources/duckduckgo_api.json @@ -0,0 +1,27 @@ +{ + "name": "duckduckgo_api", + "category": "search", + "endpoint": "https://api.duckduckgo.com/?q={target}&format=json", + "method": "GET", + "requires_auth": false, + "selectors": { + "abstract": "$.Abstract" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "any", + "output_type": [ + "url" + ], + "normalization_map": {}, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.duckduckgo.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/emailhippo.json b/sources/emailhippo.json new file mode 100644 index 0000000..31702f7 --- /dev/null +++ b/sources/emailhippo.json @@ -0,0 +1,29 @@ +{ + "name": "emailhippo", + "category": "email_rep", + "endpoint": "https://api.emailhippo.com/v3/verify?apiKey={HIPPO_API_KEY}&email={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "status": "$.meta.status" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{HIPPO_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.emailhippo.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/emailrep_io.json b/sources/emailrep_io.json new file mode 100644 index 0000000..25a9856 --- /dev/null +++ b/sources/emailrep_io.json @@ -0,0 +1,29 @@ +{ + "name": "emailrep_io", + "category": "email_rep", + "endpoint": "https://emailrep.io/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "reputation": "$.reputation" + }, + "rate_limit": 2.0, + "headers": {}, + "api_key_slots": [], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": { + "reputation": "email_reputation" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://emailrep.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/extreme_ip_lookup.json b/sources/extreme_ip_lookup.json new file mode 100644 index 0000000..ea73aa7 --- /dev/null +++ b/sources/extreme_ip_lookup.json @@ -0,0 +1,28 @@ +{ + "name": "extreme_ip_lookup", + "category": "geolocation", + "endpoint": "https://extreme-ip-lookup.com/json/{target}?key={EXTREME_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "org": "$.org" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{EXTREME_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://extreme-ip-lookup.com", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/fofa_info.json b/sources/fofa_info.json new file mode 100644 index 0000000..7a43606 --- /dev/null +++ b/sources/fofa_info.json @@ -0,0 +1,31 @@ +{ + "name": "fofa_info", + "category": "scanners", + "endpoint": "https://fofa.info/api/v1/search/all?email={FOFA_EMAIL}&key={FOFA_API_KEY}&qbase64={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "results": "$.results" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{FOFA_API_KEY}", + "{FOFA_EMAIL}" + ], + "input_type": "domain", + "output_type": [ + "ip", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://fofa.info", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/fraudlabspro.json b/sources/fraudlabspro.json new file mode 100644 index 0000000..e4dacd2 --- /dev/null +++ b/sources/fraudlabspro.json @@ -0,0 +1,31 @@ +{ + "name": "fraudlabspro", + "category": "threat_intel", + "endpoint": "https://api.fraudlabspro.com/v1/ip/check?key={FLP_API_KEY}&ip={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "fraud": "$.fraudlabspro_score" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{FLP_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "fraudlabspro_score": "fraud_score" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.fraudlabspro.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/fullcontact.json b/sources/fullcontact.json new file mode 100644 index 0000000..0432fbd --- /dev/null +++ b/sources/fullcontact.json @@ -0,0 +1,34 @@ +{ + "name": "fullcontact", + "category": "enrichment", + "endpoint": "https://api.fullcontact.com/v3/person.enrich", + "method": "POST", + "requires_auth": true, + "selectors": { + "social": "$.socialProfiles" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Bearer {FULLCONTACT_API_KEY}" + }, + "payload_template": { + "email": "{target}" + }, + "api_key_slots": [ + "{FULLCONTACT_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "username", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.fullcontact.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/github_code_search.json b/sources/github_code_search.json new file mode 100644 index 0000000..f78620b --- /dev/null +++ b/sources/github_code_search.json @@ -0,0 +1,30 @@ +{ + "name": "github_code_search", + "category": "code", + "endpoint": "https://api.github.com/search/code?q={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "urls": "$.items[*].html_url" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "token {GITHUB_TOKEN}" + }, + "api_key_slots": [ + "{GITHUB_TOKEN}" + ], + "input_type": "any", + "output_type": [ + "url" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.github.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/github_search_repos.json b/sources/github_search_repos.json new file mode 100644 index 0000000..c73fe3a --- /dev/null +++ b/sources/github_search_repos.json @@ -0,0 +1,30 @@ +{ + "name": "github_search_repos", + "category": "social", + "endpoint": "https://api.github.com/search/repositories?q={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "total": "$.total_count" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "token {GITHUB_TOKEN}" + }, + "api_key_slots": [ + "{GITHUB_TOKEN}" + ], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.github.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/github_users.json b/sources/github_users.json new file mode 100644 index 0000000..0e98c5a --- /dev/null +++ b/sources/github_users.json @@ -0,0 +1,31 @@ +{ + "name": "github_users", + "category": "social", + "endpoint": "https://api.github.com/users/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "bio": "$.bio", + "blog": "$.blog" + }, + "rate_limit": 2.0, + "headers": { + "User-Agent": "NOX" + }, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "username", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.github.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/gitlab_search.json b/sources/gitlab_search.json new file mode 100644 index 0000000..2c3919d --- /dev/null +++ b/sources/gitlab_search.json @@ -0,0 +1,26 @@ +{ + "name": "gitlab_search", + "category": "social", + "endpoint": "https://gitlab.com/api/v4/users?username={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "id": "$.[*].id" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://gitlab.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/google_safebrowsing.json b/sources/google_safebrowsing.json new file mode 100644 index 0000000..979f4af --- /dev/null +++ b/sources/google_safebrowsing.json @@ -0,0 +1,52 @@ +{ + "name": "google_safebrowsing", + "category": "threat_intel", + "endpoint": "https://safebrowsing.googleapis.com/v4/threatMatches:find?key={GOOGLE_API_KEY}", + "method": "POST", + "requires_auth": true, + "selectors": { + "matches": "$.matches" + }, + "rate_limit": 1.0, + "headers": {}, + "payload_template": { + "client": { + "clientId": "nox", + "clientVersion": "1.0" + }, + "threatInfo": { + "threatTypes": [ + "MALWARE", + "SOCIAL_ENGINEERING" + ], + "platformTypes": [ + "ANY_PLATFORM" + ], + "threatEntryTypes": [ + "URL" + ], + "threatEntries": [ + { + "url": "{target}" + } + ] + } + }, + "api_key_slots": [ + "{GOOGLE_API_KEY}" + ], + "input_type": "url", + "output_type": [ + "url" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://safebrowsing.googleapis.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/google_search_custom.json b/sources/google_search_custom.json new file mode 100644 index 0000000..8c50aa7 --- /dev/null +++ b/sources/google_search_custom.json @@ -0,0 +1,29 @@ +{ + "name": "google_search_custom", + "category": "search", + "endpoint": "https://www.googleapis.com/customsearch/v1?key={GOOGLE_CX_KEY}&cx={GOOGLE_CX_ID}&q={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "items": "$.items[*].link" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{GOOGLE_CX_KEY}", + "{GOOGLE_CX_ID}" + ], + "input_type": "any", + "output_type": [ + "url" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://www.googleapis.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/gravatar.json b/sources/gravatar.json new file mode 100644 index 0000000..9e4a674 --- /dev/null +++ b/sources/gravatar.json @@ -0,0 +1,26 @@ +{ + "name": "gravatar", + "category": "social", + "endpoint": "https://www.gravatar.com/{target}.json", + "method": "GET", + "requires_auth": false, + "selectors": { + "name": "$.entry[0].displayName" + }, + "rate_limit": 2.0, + "headers": {}, + "api_key_slots": [], + "input_type": "email", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://www.gravatar.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/greynoise_community.json b/sources/greynoise_community.json new file mode 100644 index 0000000..5c11b02 --- /dev/null +++ b/sources/greynoise_community.json @@ -0,0 +1,35 @@ +{ + "name": "greynoise_community", + "category": "threat_intel", + "endpoint": "https://api.greynoise.io/v3/community/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "noise": "$.noise", + "classification": "$.classification" + }, + "rate_limit": 1.0, + "headers": { + "key": "{GREYNOISE_API_KEY}" + }, + "api_key_slots": [ + "{GREYNOISE_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "noise": "is_noise", + "classification": "threat_class" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.greynoise.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/hackernews_user.json b/sources/hackernews_user.json new file mode 100644 index 0000000..d58f4e2 --- /dev/null +++ b/sources/hackernews_user.json @@ -0,0 +1,27 @@ +{ + "name": "hackernews_user", + "category": "social", + "endpoint": "https://hacker-news.firebaseio.com/v0/user/{target}.json", + "method": "GET", + "requires_auth": false, + "selectors": { + "karma": "$.karma" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://hacker-news.firebaseio.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/hackertarget_dnslookup.json b/sources/hackertarget_dnslookup.json new file mode 100644 index 0000000..28ce800 --- /dev/null +++ b/sources/hackertarget_dnslookup.json @@ -0,0 +1,27 @@ +{ + "name": "hackertarget_dnslookup", + "category": "dns_recon", + "endpoint": "https://api.hackertarget.com/dnslookup/?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "records": "text_lines" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.hackertarget.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/hackertarget_hostsearch.json b/sources/hackertarget_hostsearch.json new file mode 100644 index 0000000..0ea6657 --- /dev/null +++ b/sources/hackertarget_hostsearch.json @@ -0,0 +1,28 @@ +{ + "name": "hackertarget_hostsearch", + "category": "dns_recon", + "endpoint": "https://api.hackertarget.com/hostsearch/?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "hosts": "text_lines" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "ip", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.hackertarget.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/hackertarget_reverseip.json b/sources/hackertarget_reverseip.json new file mode 100644 index 0000000..5c19499 --- /dev/null +++ b/sources/hackertarget_reverseip.json @@ -0,0 +1,26 @@ +{ + "name": "hackertarget_reverseip", + "category": "dns_recon", + "endpoint": "https://api.hackertarget.com/reverseiplookup/?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "domains": "text_lines" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.hackertarget.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/hackertarget_whois.json b/sources/hackertarget_whois.json new file mode 100644 index 0000000..04e0f32 --- /dev/null +++ b/sources/hackertarget_whois.json @@ -0,0 +1,27 @@ +{ + "name": "hackertarget_whois", + "category": "whois", + "endpoint": "https://api.hackertarget.com/whois/?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "raw": "text_lines" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "email", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.hackertarget.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/hashes_org.json b/sources/hashes_org.json new file mode 100644 index 0000000..fb4714c --- /dev/null +++ b/sources/hashes_org.json @@ -0,0 +1,28 @@ +{ + "name": "hashes_org", + "category": "hashes", + "endpoint": "https://hashes.org/api.php?key={HASHES_API_KEY}&query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "found": "$.results" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{HASHES_API_KEY}" + ], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://hashes.org", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/helpers/__init__.py b/sources/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sources/helpers/config_handler.py b/sources/helpers/config_handler.py new file mode 100644 index 0000000..50c8f55 --- /dev/null +++ b/sources/helpers/config_handler.py @@ -0,0 +1,243 @@ +""" +sources/helpers/config_handler.py — NOX Framework +Unified credential management via ~/.config/nox-cli/apikeys.json (XDG). + +Priority: environment variable → apikeys.json → None +""" +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Dict, Optional + +# ── Shared constant — import this everywhere instead of a raw string ─── +UNIVERSAL_PLACEHOLDER = "INSERT_API_KEY_HERE" + +# ── XDG config path ──────────────────────────────────────────────────── +_CONFIG_DIR = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) / "nox-cli" +_APIKEYS_FILE = _CONFIG_DIR / "apikeys.json" + +# ── Complete service registry ────────────────────────────────────────── +# Format: key_name → {"display": str, "public": bool} +# public=True → no key needed, always active +# public=False → requires a real API key (goes into apikeys.json) +SERVICE_REGISTRY: Dict[str, Dict] = { + # ── Public / keyless ────────────────────────────────────────────── + "alienvault_otx_domain": {"display": "AlienVault OTX (Domain)", "public": True}, + "alienvault_otx_ip": {"display": "AlienVault OTX (IP)", "public": True}, + "alienvault_otx_malware": {"display": "AlienVault OTX (Malware)", "public": True}, + "alienvault_otx_user": {"display": "AlienVault OTX (User)", "public": True}, + "anubis_subdomains": {"display": "Anubis Subdomains", "public": True}, + "bgpview_ip": {"display": "BGPView IP", "public": True}, + "checkleaked": {"display": "CheckLeaked", "public": True}, + "crt_sh": {"display": "crt.sh", "public": True}, + "cve_search": {"display": "CVE Search", "public": True}, + "cxsecurity": {"display": "CXSecurity", "public": True}, + "duckduckgo_api": {"display": "Google / DDG Dorks", "public": True}, + "emailrep_io": {"display": "EmailRep.io", "public": True}, + "github_users": {"display": "GitHub Users", "public": True}, + "gitlab_search": {"display": "GitLab Search", "public": True}, + "gravatar": {"display": "Gravatar", "public": True}, + "hackernews_user": {"display": "HackerNews User", "public": True}, + "hackertarget_dnslookup": {"display": "HackerTarget DNS Lookup", "public": True}, + "hackertarget_hostsearch": {"display": "HackerTarget Host Search", "public": True}, + "hackertarget_reverseip": {"display": "HackerTarget Reverse IP", "public": True}, + "hackertarget_whois": {"display": "WHOIS (HackerTarget)", "public": True}, + "hudsonrock_osint": {"display": "HudsonRock OSINT", "public": True}, + "ipapi_co": {"display": "ipapi.co", "public": True}, + "ipinfo_io": {"display": "IPInfo.io", "public": True}, + "ipvigilante": {"display": "IPVigilante", "public": True}, + "keybase_lookup": {"display": "Keybase Lookup", "public": True}, + "keybase_proofs": {"display": "Keybase Proofs", "public": True}, + "maltiverse_ip": {"display": "Maltiverse IP", "public": True}, + "npm_user": {"display": "NPM User", "public": True}, + "packetstorm": {"display": "PacketStorm", "public": True}, + "phishtank_check": {"display": "PhishTank", "public": True}, + "pulsedive": {"display": "Pulsedive (Free)", "public": True}, + "pypi_user": {"display": "PyPI User", "public": True}, + "reddit_user": {"display": "Reddit User", "public": True}, + "robtex_ip": {"display": "Robtex IP", "public": True}, + "scamwatcher": {"display": "ScamWatcher", "public": True}, + "social_scan": {"display": "Social Scan", "public": True}, + "sublist3r_api": {"display": "Sublist3r API", "public": True}, + "threatcrowd_domain": {"display": "ThreatCrowd (Domain)", "public": True}, + "threatcrowd_email": {"display": "ThreatCrowd (Email)", "public": True}, + "threatminer_domain": {"display": "ThreatMiner (Domain)", "public": True}, + "threatminer_ip": {"display": "ThreatMiner (IP)", "public": True}, + "urlscan_search": {"display": "URLScan.io", "public": True}, + "vigilante_pw": {"display": "Vigilante.pw", "public": True}, + "wayback_machine": {"display": "Wayback Machine", "public": True}, + # ── Private / key-required ──────────────────────────────────────── + "ABSTRACT_API_KEY": {"display": "Abstract Email Validation", "public": False}, + "ABUSEIPDB_API_KEY": {"display": "AbuseIPDB", "public": False}, + "ANYRUN_API_KEY": {"display": "Any.run", "public": False}, + "BA_API_KEY": {"display": "BreachAware", "public": False}, + "BD_API_KEY": {"display": "BreachDirectory", "public": False}, + "BINARYEDGE_API_KEY": {"display": "BinaryEdge", "public": False}, + "BING_API_KEY": {"display": "Bing Search API", "public": False}, + "CENSYS_AUTH_BASE64": {"display": "Censys", "public": False}, + "CIRCL_AUTH_BASE64": {"display": "CIRCL.lu PDNS", "public": False}, + "CIT0DAY_API_KEY": {"display": "Cit0day", "public": False}, + "CLEARBIT_API_KEY": {"display": "Clearbit Enrich", "public": False}, + "CRIMINALIP_API_KEY": {"display": "CriminalIP", "public": False}, + "DEHASHED_AUTH_BASE64": {"display": "Dehashed", "public": False}, + "DNSDB_API_KEY": {"display": "DNSDB Passive DNS", "public": False}, + "DT_AUTH_BASE64": {"display": "DomainTools WHOIS", "public": False}, + "EXTREME_API_KEY": {"display": "Extreme IP Lookup", "public": False}, + "FLP_API_KEY": {"display": "FraudLabsPro", "public": False}, + "FOFA_API_KEY": {"display": "FOFA", "public": False}, + "FOFA_EMAIL": {"display": "FOFA (account email)", "public": False}, + "FULLCONTACT_API_KEY": {"display": "FullContact", "public": False}, + "GITHUB_TOKEN": {"display": "GitHub (Code/Repo Search)", "public": False}, + "GOOGLE_API_KEY": {"display": "Google Safe Browsing", "public": False}, + "GOOGLE_CX_KEY": {"display": "Google Custom Search (API key)", "public": False}, + "GOOGLE_CX_ID": {"display": "Google Custom Search (CX ID)", "public": False}, + "GREYNOISE_API_KEY": {"display": "GreyNoise", "public": False}, + "HASHES_API_KEY": {"display": "Hashes.org", "public": False}, + "HIBP_API_KEY": {"display": "HaveIBeenPwned", "public": False}, + "HIPPO_API_KEY": {"display": "EmailHippo", "public": False}, + "HUNTER_API_KEY": {"display": "Hunter.io", "public": False}, + "HYBRID_API_KEY": {"display": "Hybrid Analysis", "public": False}, + "INTELX_API_KEY": {"display": "IntelX", "public": False}, + "INTEZER_API_KEY": {"display": "Intezer", "public": False}, + "IPDATA_API_KEY": {"display": "IPData.co", "public": False}, + "IPGEO_API_KEY": {"display": "IPGeolocation.io", "public": False}, + "IPINFODB_API_KEY": {"display": "IPInfoDB", "public": False}, + "IPQS_API_KEY": {"display": "IPQualityScore", "public": False}, + "IPSTACK_API_KEY": {"display": "IPStack", "public": False}, + "JOE_API_KEY": {"display": "Joe Sandbox", "public": False}, + "LEAKCHECK_API_KEY": {"display": "LeakCheck", "public": False}, + "LEAKIX_API_KEY": {"display": "LeakIX", "public": False}, + "LEAKSTATS_API_KEY": {"display": "LeakStats.pw", "public": False}, + "MAILBOX_API_KEY": {"display": "Mailboxlayer", "public": False}, + "MALSHARE_API_KEY": {"display": "MalShare", "public": False}, + "METADEFENDER_API_KEY": {"display": "MetaDefender", "public": False}, + "MISP_API_KEY": {"display": "MISP", "public": False}, + "NUMVERIFY_API_KEY": {"display": "Numverify", "public": False}, + "ONYPHE_API_KEY": {"display": "Onyphe", "public": False}, + "PASSIVETOTAL_AUTH_BASE64": {"display": "PassiveTotal / RiskIQ", "public": False}, + "PIPL_API_KEY": {"display": "Pipl", "public": False}, + "PULSEDIVE_API_KEY": {"display": "Pulsedive (Premium)", "public": False}, + "RF_TOKEN": {"display": "Recorded Future", "public": False}, + "SECURITYTRAILS_API_KEY": {"display": "SecurityTrails", "public": False}, + "SHODAN_API_KEY": {"display": "Shodan", "public": False}, + "SNUSBASE_API_KEY": {"display": "Snusbase", "public": False}, + "SPYCLOUD_API_KEY": {"display": "SpyCloud", "public": False}, + "SPYONWEB_API_KEY": {"display": "SpyOnWeb", "public": False}, + "SPYSE_API_KEY": {"display": "Spyse", "public": False}, + "TC_API_KEY": {"display": "ThreatConnect", "public": False}, + "TINES_API_KEY": {"display": "Tines Breach", "public": False}, + "TP_API_KEY": {"display": "ThreatPortal", "public": False}, + "TWITTER_BEARER_TOKEN": {"display": "Twitter / X API v2", "public": False}, + "URLVOID_API_KEY": {"display": "URLVoid", "public": False}, + "VIEWDNS_API_KEY": {"display": "ViewDNS", "public": False}, + "VIRUSTOTAL_API_KEY": {"display": "VirusTotal", "public": False}, + "VULNERS_API_KEY": {"display": "Vulners", "public": False}, + "WF_API_KEY": {"display": "WhoisFreaks", "public": False}, + "WHOISXML_API_KEY": {"display": "WhoisXML API", "public": False}, + "WHOXY_API_KEY": {"display": "Whoxy WHOIS", "public": False}, + "ZEROBOUNCE_API_KEY": {"display": "ZeroBounce", "public": False}, + "ZOOMEYE_API_KEY": {"display": "ZoomEye", "public": False}, +} + +_PRIVATE_KEYS = {k: v for k, v in SERVICE_REGISTRY.items() if not v["public"]} + + +# ── Store helpers ────────────────────────────────────────────────────── + +def _default_store() -> Dict[str, str]: + """Return a dict of all private service keys set to UNIVERSAL_PLACEHOLDER.""" + return {k: UNIVERSAL_PLACEHOLDER for k in _PRIVATE_KEYS} + + +def _write_store(data: Dict[str, str]) -> None: + """Atomically write data to apikeys.json with chmod 0600.""" + try: + _CONFIG_DIR.mkdir(mode=0o700, parents=True, exist_ok=True) + _CONFIG_DIR.chmod(0o700) + tmp = _APIKEYS_FILE.with_suffix(".tmp") + tmp.write_text(json.dumps(data, indent=4, sort_keys=True), encoding="utf-8") + tmp.replace(_APIKEYS_FILE) + _APIKEYS_FILE.chmod(0o600) + except PermissionError as exc: + raise RuntimeError(f"[config_handler] Cannot write {_APIKEYS_FILE}: {exc}") from exc + + +def _load_store() -> Dict[str, str]: + """Load apikeys.json, creating it with defaults if absent. Self-heals on corrupt files.""" + _CONFIG_DIR.mkdir(mode=0o700, parents=True, exist_ok=True) + _CONFIG_DIR.chmod(0o700) + if not _APIKEYS_FILE.exists(): + print(" \033[92m[+]\033[0m Initializing NOX Environment in ~/.config/nox-cli/") + _write_store(_default_store()) + return _default_store() + try: + text = _APIKEYS_FILE.read_text(encoding="utf-8").strip() + if not text: + raise json.JSONDecodeError("Empty file", "", 0) + data = json.loads(text) + if not isinstance(data, dict): + raise json.JSONDecodeError("Root is not a JSON object", text, 0) + # Back-fill keys added in newer versions + new_keys = {k: UNIVERSAL_PLACEHOLDER for k in _PRIVATE_KEYS if k not in data} + if new_keys: + data.update(new_keys) + _write_store(data) + return data + except json.JSONDecodeError: + bak = _APIKEYS_FILE.with_suffix(".json.bak") + _APIKEYS_FILE.rename(bak) + print(f"[!] Malformed apikeys.json detected — backed up to {bak.name} and reset to defaults.") + defaults = _default_store() + _write_store(defaults) + return defaults + except PermissionError as exc: + raise RuntimeError(f"[config_handler] Cannot read {_APIKEYS_FILE}: {exc}") from exc + + +# ── ConfigManager ────────────────────────────────────────────────────── + +class ConfigManager: + """ + Unified API key manager. + + Resolution order per key: + 1. Environment variable (exact key name) + 2. ~/.config/nox-cli/apikeys.json + 3. Returns None if value equals UNIVERSAL_PLACEHOLDER or is absent + """ + + _cache: Dict[str, Optional[str]] = {} + _store: Optional[Dict[str, str]] = None + + @classmethod + def _get_store(cls) -> Dict[str, str]: + if cls._store is None: + cls._store = _load_store() + return cls._store + + @classmethod + def get_key(cls, key_name: str) -> Optional[str]: + """Return the configured value, or None if missing/placeholder.""" + if key_name in cls._cache: + return cls._cache[key_name] + val = os.environ.get(key_name, "") or cls._get_store().get(key_name, "") + result = None if (not val or val == UNIVERSAL_PLACEHOLDER) else val + cls._cache[key_name] = result + return result + + # Backward-compatible alias used by nox.py internals + get = get_key + + @classmethod + def set(cls, key_name: str, value: str) -> None: + """Persist a key to apikeys.json and update the in-memory cache.""" + store = cls._get_store() + store[key_name] = value + _write_store(store) + cls._cache[key_name] = None if value == UNIVERSAL_PLACEHOLDER else value + + @classmethod + def config_path(cls) -> Path: + return _APIKEYS_FILE diff --git a/sources/helpers/cracker.py b/sources/helpers/cracker.py new file mode 100644 index 0000000..9ac2501 --- /dev/null +++ b/sources/helpers/cracker.py @@ -0,0 +1,119 @@ +""" +sources/helpers/cracker.py +Resilient async hash cracker for NOX autoscan. + +Detects MD5 / SHA1 / SHA256 / bcrypt hashes inside breach records, +fires background crack attempts against available APIs, and returns +results without ever blocking the main pivot pipeline. +""" + +import asyncio +import logging +import re +from typing import List, Optional, Tuple + +# C2: MD5 and NTLM share the same 32-char hex pattern. +# We list md5 first (most common in breach data) but also accept ntlm +# so callers can query NTLM-specific APIs when needed. +_PATTERNS: List[Tuple[str, re.Pattern]] = [ + ("bcrypt", re.compile(r"^\$2[aby]?\$\d{2}\$.{53}$")), + ("sha256", re.compile(r"^[a-f0-9]{64}$", re.I)), + ("sha1", re.compile(r"^[a-f0-9]{40}$", re.I)), + ("md5", re.compile(r"^[a-f0-9]{32}$", re.I)), + # ntlm shares the 32-char hex pattern — detected as md5 first, + # but async_crack queries both md5 and ntlm APIs for 32-char hashes. +] + +# Writes to ~/.config/nox-cli/logs/nox_system.log — never to terminal +_syslog = logging.getLogger("nox.system") + +# Per-API timeout — each individual rainbow-table query budget +_API_TIMEOUT = 8 +# Global crack budget — hard cap regardless of API count or response order +CRACK_TIMEOUT = 20 + + +def detect_hash(value: str) -> Optional[str]: + """Return hash type string if value matches a known hash pattern, else None.""" + v = value.strip() + for htype, pat in _PATTERNS: + if pat.match(v): + return htype + return None + + +async def _query_api(session, url: str, fmt: str) -> Optional[str]: + """Single API query — returns plaintext or None. Never raises.""" + try: + import aiohttp + to = aiohttp.ClientTimeout(total=_API_TIMEOUT) + async with session.get(url, timeout=to) as resp: + if resp.status != 200: + return None + if fmt == "text": + text = (await resp.text()).strip() + # Reject empty, too-long, or obvious error responses + if not text or len(text) > 128: + return None + tl = text.lower() + if any(tl.startswith(p) for p in ("not found", "error", "invalid", "no result", "not in", "cmd5-error", "not exist", "code erreur", "erreur", "unknown")): + return None + return text + data = await resp.json(content_type=None) + return data.get("result") or data.get("plaintext") or data.get("plain") or None + except Exception: + return None + + +async def async_crack(session, hash_value: str, hash_type: str) -> Optional[str]: + """ + Query multiple rainbow-table APIs concurrently. + Returns first plaintext found, or None. bcrypt is skipped. + + C1: create tasks upfront for cancellation, but await each via asyncio.shield + inside as_completed — no double wait_for wrapping. + C2: for 32-char hex (md5/ntlm ambiguity), also query NTLM-specific APIs. + + Per-API timeout: 8s. Global budget: 20s (CRACK_TIMEOUT). + All tasks are cancelled as soon as the first result is found. + """ + if hash_type == "bcrypt": + return None + + h = hash_value.strip().lower() + apis = [ + (f"https://www.nitrxgen.net/md5db/{h}", "text"), + (f"https://hashes.com/en/api/hash?hash={h}", "json"), + (f"https://hash.help/api/lookup/{h}", "json"), + (f"https://hashkiller.io/api/search.php?hash={h}", "json"), + (f"https://md5decrypt.net/Api/api.php?hash={h}&hash_type={hash_type}&email=&code=", "text"), + (f"https://www.cmd5.org/api.ashx?hash={h}", "text"), + ] + # C2: for 32-char hashes (md5/ntlm ambiguous), add NTLM-specific endpoint + if hash_type == "md5" and len(h) == 32: + apis.append((f"https://hashes.com/en/api/hash?hash={h}&type=ntlm", "json")) + + # C1: create tasks so we can cancel them; shield each before passing to wait_for + # so cancellation of the shield future does not cancel the underlying task prematurely. + tasks = [asyncio.create_task(_query_api(session, url, fmt)) for url, fmt in apis] + result: Optional[str] = None + try: + for fut in asyncio.as_completed(tasks): + try: + res = await asyncio.wait_for(asyncio.shield(fut), timeout=_API_TIMEOUT) + except (asyncio.TimeoutError, asyncio.CancelledError): + continue + except Exception: + continue + if res: + result = res + break + except Exception: + pass + finally: + # Cancel all remaining tasks and await to suppress pending-task warnings + for t in tasks: + if not t.done(): + t.cancel() + await asyncio.gather(*[t for t in tasks if not t.done()], return_exceptions=True) + return result diff --git a/sources/helpers/reporting.py b/sources/helpers/reporting.py new file mode 100644 index 0000000..df54511 --- /dev/null +++ b/sources/helpers/reporting.py @@ -0,0 +1,658 @@ +""" +sources/helpers/reporting.py +NOX Enterprise Reporting — Executive Summary, Pivot Chain, Data Sanitization. +""" + +import hashlib +import html as _html +import json +import re +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List + +# ── Noise patterns stripped from all report output ──────────────────── +_NOISE_RE = re.compile( + r"(Traceback \(most recent|File \".*\.py\"|TimeoutError|ProxyError" + r"|ConnectionError|aiohttp\.|ClientConnector|ssl\.|asyncio\." + r"|Task exception|NoneType|Object of type)", + re.I, +) +_CTRL_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]") + + +def _nox_ver() -> str: + try: + from nox import VERSION # type: ignore + return VERSION + except ImportError: + return "1.0.0" + + +def _clean(v: Any, maxlen: int = 200) -> str: + """Strip control chars, technical noise, HTML-escape, truncate.""" + s = str(v) if v is not None else "" + s = _CTRL_RE.sub("", s) + if _NOISE_RE.search(s): + return "" + return _html.escape(s[:maxlen]) + + +def _raw(v: Any, maxlen: int = 200) -> str: + """Strip control chars only — no HTML escaping (PDF / plain-text paths).""" + s = str(v) if v is not None else "" + s = _CTRL_RE.sub("", s) + if _NOISE_RE.search(s): + return "" + return s[:maxlen] + + +def _pdf_safe(s: str, maxlen: int = 180) -> str: + # D4: sanitize for fpdf2 core fonts (latin-1 subset). + # NFKD normalization decomposes accented chars (é→e + combining accent) + # so common accented Latin characters survive as their base letter. + # Truly non-latin-1 chars (Cyrillic, CJK, etc.) become '?' — intentional: + # fpdf2 core fonts cannot render them and would raise UnicodeEncodeError. + s = _raw(s, maxlen) + try: + import unicodedata + normalized = unicodedata.normalize("NFKD", s) + return normalized.encode("ascii", errors="replace").decode("ascii") + except Exception: + return s.encode("latin-1", errors="replace").decode("latin-1") + + +def _rget(r: Any, k: str) -> str: + if isinstance(r, dict): + return str(r.get(k, "") or "") + return str(getattr(r, k, "") or "") + + +# ── Executive summary builder ───────────────────────────────────────── + +def build_exec_summary(data: dict) -> dict: + """ + Returns a dict with all dashboard KPIs needed by every format. + Expects data keys: records, analysis, scan_meta (optional). + """ + records = data.get("records", []) + meta = data.get("scan_meta", {}) or {} + analysis = data.get("analysis", {}) or {} + + cleartext = sum(1 for r in records if _rget(r, "password")) + nodes = len({_rget(r, "email") or _rget(r, "username") for r in records} - {""}) + elapsed = meta.get("elapsed_seconds") + depth = meta.get("pivot_depth", len(data.get("pivot_chain", []))) + + buckets: Dict[str, int] = {"Critical": 0, "High": 0, "Medium": 0, "Low": 0, "Info": 0} + for r in records: + rs = float(_rget(r, "risk_score") or 0) + if rs >= 90: buckets["Critical"] += 1 + elif rs >= 70: buckets["High"] += 1 + elif rs >= 40: buckets["Medium"] += 1 + elif rs >= 10: buckets["Low"] += 1 + else: buckets["Info"] += 1 + + return { + "total_records": len(records), + "nodes_discovered": nodes, + "cleartext_passwords": cleartext, + "pivot_depth": depth, + "elapsed": f"{elapsed:.1f}s" if elapsed is not None else "N/A", + "buckets": buckets, + "hvt_count": analysis.get("hvt_count", sum(1 for r in records if getattr(r, "is_hvt", False))), + } + + +# ── Pivot chain renderer ────────────────────────────────────────────── + +def render_pivot_chain(data: dict) -> List[str]: + """ + Build a human-readable pivot chain. + D2: check pivot_log first before falling back to record-based reconstruction. + """ + chain = data.get("pivot_chain") or [] + target = _raw(data.get("target", "?")) + + # D2: if pivot_log is available, build chain from it (accurate tree) + pivot_log = data.get("pivot_log") or [] + if pivot_log: + lines: List[str] = [] + for e in pivot_log: + depth = e.get("depth", 0) + asset = _raw(e.get("asset", "")) + phase = _raw(e.get("found_in", e.get("source", "?"))) + parent = _raw(e.get("parent") or "") + prefix = " " * depth + if depth == 0: + lines.append(f"[SEED] {asset}") + else: + lines.append(f"{prefix}└─ [{phase}] {asset} ← {parent}") + return lines if lines else [f"[SEED] {target} (no pivot data)"] + + if len(chain) <= 1: + # No pivot data — reconstruct best-effort from records + records = data.get("records", []) + lines = [f"[SEED] {target}"] + seen: set = {target.lower()} + for r in records[:40]: + src = _raw(_rget(r, "source")) + em = _raw(_rget(r, "email")) + usr = _raw(_rget(r, "username")) + ident = em or usr + if not ident or ident.lower() in seen: + continue + seen.add(ident.lower()) + lines.append(f" └─ [{src}] → {ident}") + dork_results = data.get("dork_results") or [] + for d in dork_results[:5]: + url = _raw(d.get("url", "")) + if url and url.lower() not in seen: + seen.add(url.lower()) + lines.append(f" └─ [Dork] → {url[:80]}") + return lines if len(lines) > 1 else [f"[SEED] {target} (no pivot data)"] + + # Ordered pivot chain from AvalancheScanner + lines = [f"[SEED] {_raw(chain[0])}"] + for node in chain[1:]: + lines.append(f" └─ [Pivot] → {_raw(node)}") + return lines + + +# ── JSON report ─────────────────────────────────────────────────────── + +def to_json(data: dict, path: str) -> None: + summary = build_exec_summary(data) + chain = render_pivot_chain(data) + records = data.get("records", []) + + def _ser(o): + try: + from enum import Enum + if isinstance(o, Enum): + return o.name + except ImportError: + pass + if hasattr(o, "to_dict"): + return o.to_dict() + return str(o) + + clean_records = [] + for r in records: + d = r.to_dict() if hasattr(r, "to_dict") else (r if isinstance(r, dict) else {}) + # drop noise fields + clean_records.append({ + k: v for k, v in d.items() + if k not in ("raw_data", "metadata") and not _NOISE_RE.search(str(v or "")) + }) + + try: + from nox import VERSION as _NOX_VERSION # type: ignore + except ImportError: + _NOX_VERSION = "1.0.0" + + # Include dork and scrape results in JSON output + dork_results = data.get("dork_results", []) or [] + scrape_results = data.get("scrape_results", {}) or {} + + # D3: apply consistent cap (1000) — same as HTML + _RECORD_CAP = 1000 + + out_data = { + "framework": f"NOX v{_NOX_VERSION}", + "generated": datetime.now().isoformat(), + "target": data.get("target", ""), + # J3: self-describing metadata block + "_meta": { + "scan_id": hashlib.sha256( + f"{data.get('target','')}{datetime.now().isoformat()}".encode() + ).hexdigest()[:16], + "target": data.get("target", ""), + "timestamp": datetime.now().isoformat(), + "nox_version": _NOX_VERSION, + "sources_queried": summary.get("total_records", 0), + "pivot_depth_reached": summary.get("pivot_depth", 0), + "record_cap": _RECORD_CAP, + "truncated": len(clean_records) > _RECORD_CAP, + }, + "executive_summary": summary, + "pivot_chain": chain, + "records": clean_records[:_RECORD_CAP], + "dork_results": dork_results, + "scrape_results": scrape_results, + } + Path(path).write_text(json.dumps(out_data, indent=2, default=_ser), encoding="utf-8") + print(f"[+] JSON report saved: {path}") + + +# ── HTML report ─────────────────────────────────────────────────────── + +_CSS = ( + "*{margin:0;padding:0;box-sizing:border-box}" + "body{font-family:'Courier New',monospace;background:#0a0a0a;color:#e0e0e0;padding:20px}" + ".hdr{text-align:center;padding:28px;border:1px solid #333;margin-bottom:18px;background:#111}" + ".hdr h1{color:#00ff41;font-size:26px;letter-spacing:4px}" + ".hdr p{color:#888;margin-top:5px;font-size:12px}" + ".kpis{display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));gap:10px;margin:14px 0}" + ".kpi{background:#111;border:1px solid #333;padding:16px;text-align:center}" + ".kpi .n{font-size:30px;font-weight:bold;color:#00ff41}" + ".kpi .l{color:#888;font-size:10px;margin-top:3px}" + ".kpi.warn .n{color:#ff6600} .kpi.crit .n{color:#ff0040}" + ".sec{margin:18px 0} .sec h2{color:#00ff41;border-bottom:1px solid #333;padding-bottom:5px;margin-bottom:10px}" + ".chain{background:#0d1a0d;border:1px solid #1a3a1a;padding:12px;font-size:11px;color:#00cc33;word-break:break-all;margin:8px 0}" + "table{width:100%;border-collapse:collapse} th,td{padding:7px;border:1px solid #222;font-size:11px;word-break:break-all}" + "th{background:#1a1a1a;color:#00ff41;text-transform:uppercase;font-size:10px} td{background:#0d0d0d}" + "tr.c td{background:#1a0005} tr.h td{background:#1a0a00} tr.m td{background:#1a1500}" + ".pw{color:#ff0040;font-weight:bold}" +) + + +def to_html(data: dict, path: str) -> None: + summary = build_exec_summary(data) + chain = render_pivot_chain(data) + target = _clean(data.get("target", "Unknown")) + records = data.get("records", []) + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") + + # KPI dashboard + kpis = ( + f'
    {summary["total_records"]}
    TOTAL RECORDS
    ' + f'
    {summary["nodes_discovered"]}
    NODES DISCOVERED
    ' + f'
    {summary["cleartext_passwords"]}
    CLEARTEXT PASSWORDS
    ' + f'
    {summary["hvt_count"]}
    HIGH-VALUE TARGETS
    ' + f'
    {summary["pivot_depth"]}
    PIVOT DEPTH
    ' + f'
    {summary["elapsed"]}
    TOTAL TIME
    ' + ) + + # Severity table + sev_rows = "".join( + f"{lvl}{cnt}" + for lvl, cnt in summary["buckets"].items() if cnt + ) + + # Pivot chain + chain_html = "".join(f'
    {_clean(c)}
    ' for c in chain) + + # Credential rows (top 500, noise-free) + cred_rows = "" + for r in records[:500]: + rs = float(_rget(r, "risk_score") or 0) + cls = "c" if rs >= 90 else "h" if rs >= 70 else "m" if rs >= 40 else "" + em = _clean(_rget(r, "email") or _rget(r, "username")) + pw = _clean(_rget(r, "password")) + src = _clean(_rget(r, "source")) + bd = _clean(_rget(r, "breach_date")) + hvt = " ⚑" if getattr(r, "is_hvt", False) or (isinstance(r, dict) and r.get("is_hvt")) else "" + cred_rows += ( + f"{em}{hvt}" + f"{pw}{src}{bd}{rs:.0f}" + ) + + # Dork results section + dork_results = data.get("dork_results", []) or [] + dork_rows = "" + for h in dork_results: + url = h.get("url", "") + title = h.get("title", "") or h.get("dork", "") + snippet = h.get("snippet", "") + engine = h.get("engine", "") + link = (f'{_clean(url[:80])}' + if url else _clean(title[:80])) + dork_rows += ( + f"{link}{_clean(snippet[:120])}" + f"{_clean(h.get('dork','')[:80])}{_clean(engine)}" + ) + dork_section = ( + f'

    Dork Results ({len(dork_results)} hits)

    ' + f'' + f'{dork_rows if dork_rows else ""}
    URL / TitleSnippetDork QueryEngine
    No dork hits
    ' + ) + + # Scrape results section + scrape_results = data.get("scrape_results", {}) or {} + pastes = scrape_results.get("pastes", []) + creds_sc = scrape_results.get("credentials", []) + tg_hits = scrape_results.get("telegram", []) + mc_hits = scrape_results.get("dork_misconfigs", []) + + paste_rows = "" + for p in pastes: + site = _clean(p.get("site", "")) + pid = p.get("id", "") + pats = _clean(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items())) + paste_rows += f"{site}{_clean(pid)}{pats}" + + cred_sc_rows = "" + for c in creds_sc: + cred_sc_rows += ( + f"{_clean(c.get('raw','')[:120])}" + f"{_clean(c.get('source',''))}{_clean(c.get('paste_id',''))}" + ) + + tg_rows = "" + for t in tg_hits: + ch = _clean(t.get("channel", "")) + text = _clean(t.get("text", "")[:200]) + pats = _clean(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items())) + link = f't.me/s/{ch}' + tg_rows += f"{link}{text}{pats}" + + mc_rows = "" + for m in mc_hits: + url_m = m.get("url", "") + title_m = _clean(m.get("title", "")[:80]) + dork_m = _clean(m.get("dork", "")[:80]) + link_m = (f'{_clean(url_m[:80])}' + if url_m else title_m) + mc_rows += f"{link_m}{title_m}{dork_m}" + + scrape_section = ( + f'

    Scrape Results

    ' + f'

    Pastes ({len(pastes)})

    ' + f'' + f'{paste_rows or ""}
    SitePaste IDPatterns
    None
    ' + f'

    Extracted Credentials ({len(creds_sc)})

    ' + f'' + f'{cred_sc_rows or ""}
    Raw CredentialSourcePaste ID
    None
    ' + f'

    Telegram CTI ({len(tg_hits)})

    ' + f'' + f'{tg_rows or ""}
    ChannelMessagePatterns
    None
    ' + f'

    Misconfigurations ({len(mc_hits)})

    ' + f'' + f'{mc_rows or ""}
    URLTitleDork
    None
    ' + f'
    ' + ) + + page = ( + f'' + f'NOX — {target}' + f'

    [ NOX ]

    ' + f'

    Target: {target}  |  {ts}  |  NOX v{_nox_ver()}

    ' + f'

    Executive Summary

    ' + f'
    {kpis}
    ' + f'' + f'{sev_rows}
    SeverityCount
    ' + f'

    Pivot Chain

    {chain_html}
    ' + f'{dork_section}' + f'{scrape_section}' + f'

    Credential Records (top 500)

    ' + f'' + f'{cred_rows}
    IdentityPasswordSourceDateRisk
    ' + f'' + ) + Path(path).write_text(page, encoding="utf-8") + print(f"[+] HTML report saved: {path}") + + +# ── PDF report (fpdf2) ──────────────────────────────────────────────── + +def to_pdf(data: dict, path: str, investigator_id: str = "NOX-AUTO") -> None: + # D1: raise a clear error with install hint if fpdf2 is absent — never silently return. + try: + from fpdf import FPDF # type: ignore + except ImportError: + msg = "[!] fpdf2 not installed — PDF report cannot be generated. Run: pip install fpdf2" + print(msg) + raise RuntimeError(msg) + + summary = build_exec_summary(data) + chain = render_pivot_chain(data) + target = _raw(data.get("target", "Unknown")) + records = data.get("records", []) + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") + + class _PDF(FPDF): + def header(self): + self.set_font("Helvetica", "B", 8) + self.set_text_color(120, 120, 120) + self.cell(0, 5, "NOX - FORENSIC INTELLIGENCE REPORT - CONFIDENTIAL", align="R") + self.ln(3) + + def footer(self): + self.set_y(-12) + self.set_font("Helvetica", "", 8) + self.set_text_color(150, 150, 150) + self.cell(0, 5, _pdf_safe(f"Page {self.page_no()} | {target[:50]}"), align="C") + + pdf = _PDF(orientation="P", unit="mm", format="A4") + pdf.set_auto_page_break(auto=True, margin=15) + pdf.set_margins(15, 15, 15) + + # ── Cover page ──────────────────────────────────────────────────── + pdf.add_page() + pdf.set_fill_color(15, 15, 15) + pdf.rect(0, 0, 210, 297, "F") + pdf.set_y(65) + pdf.set_font("Helvetica", "B", 26) + pdf.set_text_color(0, 220, 60) + pdf.cell(0, 12, "FORENSIC INTELLIGENCE REPORT", align="C") + pdf.ln(8) + pdf.set_font("Helvetica", "B", 13) + pdf.set_text_color(200, 200, 200) + pdf.cell(0, 8, _pdf_safe(f"Target: {target}"), align="C") + pdf.ln(6) + pdf.set_font("Helvetica", "", 10) + pdf.set_text_color(140, 140, 140) + for line in [f"Generated: {ts}", f"Investigator: {investigator_id}", + f"Framework: NOX v{_nox_ver()}", "Classification: RESTRICTED"]: + pdf.cell(0, 6, _pdf_safe(line), align="C") + pdf.ln(5) + + # ── Executive Summary ───────────────────────────────────────────── + pdf.add_page() + pdf.set_fill_color(255, 255, 255) + pdf.set_text_color(0, 0, 0) + pdf.set_font("Helvetica", "B", 15) + pdf.cell(0, 10, "Executive Summary", ln=True) + pdf.set_draw_color(0, 180, 50) + pdf.set_line_width(0.4) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(4) + + kpis = [ + ("Total Time", summary["elapsed"]), + ("Nodes Discovered", str(summary["nodes_discovered"])), + ("Cleartext Passwords Found", str(summary["cleartext_passwords"])), + ("Pivot Depth", str(summary["pivot_depth"])), + ("Total Records", str(summary["total_records"])), + ("High-Value Targets", str(summary["hvt_count"])), + ] + pdf.set_font("Helvetica", "B", 10) + for label, value in kpis: + pdf.set_fill_color(245, 245, 245) + pdf.cell(95, 7, _pdf_safe(label), border=1, fill=True) + pdf.set_font("Helvetica", "", 10) + pdf.cell(80, 7, _pdf_safe(value), border=1, ln=True) + pdf.set_font("Helvetica", "B", 10) + pdf.ln(4) + + # Severity breakdown + pdf.set_font("Helvetica", "B", 11) + pdf.cell(0, 7, "Severity Breakdown", ln=True) + _sev_c = {"Critical": (220,0,30), "High": (220,100,0), + "Medium": (200,180,0), "Low": (0,150,50), "Info": (100,100,100)} + total_b = max(sum(summary["buckets"].values()), 1) + for level, count in summary["buckets"].items(): + pdf.set_font("Helvetica", "", 9) + pdf.cell(35, 6, _pdf_safe(level), border=1) + pdf.cell(20, 6, str(count), border=1) + bar_w = int(count / total_b * 120) + x, y = pdf.get_x(), pdf.get_y() + pdf.cell(125, 6, "", border=1) + if bar_w: + rc, gc, bc = _sev_c.get(level, (100, 100, 100)) + pdf.set_fill_color(rc, gc, bc) + pdf.rect(x + 1, y + 1, bar_w, 4, "F") + pdf.ln() + + # ── Pivot Chain ─────────────────────────────────────────────────── + pdf.ln(5) + pdf.set_font("Helvetica", "B", 11) + pdf.cell(0, 7, "Pivot Chain Visualization", ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(3) + pdf.set_font("Courier", "", 8) + pdf.set_fill_color(240, 255, 240) + for c_line in chain: + # Word-wrap long chains at 100 chars + for chunk in [c_line[i:i+100] for i in range(0, max(len(c_line), 1), 100)]: + pdf.set_x(15) + pdf.cell(180, 5, _pdf_safe(chunk), border=0, ln=True, fill=True) + pdf.ln(3) + + # ── Credential Findings ─────────────────────────────────────────── + pdf.add_page() + pdf.set_font("Helvetica", "B", 13) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 9, "Credential Findings", ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(3) + + cols = [("Identity", 60), ("Password", 45), ("Source", 35), ("Date", 25), ("Risk", 15)] + + def _write_col_headers(): + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30) + pdf.set_text_color(255, 255, 255) + for col_name, col_w in cols: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln() + pdf.set_text_color(0, 0, 0) + + _write_col_headers() + + for r in records[:500]: + pw = _rget(r, "password") + if not pw and not _rget(r, "email") and not _rget(r, "username"): + continue # skip noise rows with no actionable data + rs = float(_rget(r, "risk_score") or 0) + if rs >= 90: pdf.set_fill_color(255, 220, 220) + elif rs >= 70: pdf.set_fill_color(255, 240, 220) + else: pdf.set_fill_color(255, 255, 255) + pdf.set_font("Helvetica", "", 7) + # Auto page-break with repeated column headers (§5.1) + if pdf.get_y() > pdf.h - 25: + pdf.add_page() + _write_col_headers() + vals = [ + _pdf_safe(_rget(r, "email") or _rget(r, "username"), 38), + _pdf_safe(pw, 28), + _pdf_safe(_rget(r, "source"), 22), + _pdf_safe(_rget(r, "breach_date"), 14), + f"{rs:.0f}", + ] + for val, (_, w) in zip(vals, cols): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + + # ── Dork Results ───────────────────────────────────────────────── + dork_results = data.get("dork_results", []) or [] + if dork_results: + pdf.add_page() + pdf.set_font("Helvetica", "B", 13) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 9, _pdf_safe(f"Dork Results ({len(dork_results)} hits)"), ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(3) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("URL / Title", 95), ("Snippet", 55), ("Engine", 30)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for h in dork_results[:200]: + pdf.set_fill_color(245, 245, 255); pdf.set_font("Helvetica", "", 7) + url = _pdf_safe(h.get("url", h.get("title", "")), 65) + snippet = _pdf_safe(h.get("snippet", ""), 38) + engine = _pdf_safe(h.get("engine", ""), 20) + for val, w in zip([url, snippet, engine], [95, 55, 30]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + + # ── Scrape Results ──────────────────────────────────────────────── + scrape_results = data.get("scrape_results", {}) or {} + pastes = scrape_results.get("pastes", []) + creds_sc = scrape_results.get("credentials", []) + tg_hits = scrape_results.get("telegram", []) + mc_hits = scrape_results.get("dork_misconfigs", []) + + if pastes or creds_sc or tg_hits or mc_hits: + pdf.add_page() + pdf.set_font("Helvetica", "B", 13) + pdf.set_text_color(0, 0, 0) + pdf.cell(0, 9, "Scrape Results", ln=True) + pdf.line(15, pdf.get_y(), 195, pdf.get_y()) + pdf.ln(3) + + if pastes: + pdf.set_font("Helvetica", "B", 10) + pdf.cell(0, 7, _pdf_safe(f"Pastes ({len(pastes)})"), ln=True) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("Site", 25), ("Paste ID", 80), ("Patterns", 75)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for p in pastes[:100]: + pdf.set_fill_color(245, 245, 245); pdf.set_font("Helvetica", "", 7) + site = _pdf_safe(p.get("site", ""), 15) + pid = _pdf_safe(p.get("id", ""), 55) + pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (p.get("patterns") or {}).items()), 50) + for val, w in zip([site, pid, pats], [25, 80, 75]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + pdf.ln(3) + + if creds_sc: + pdf.set_font("Helvetica", "B", 10) + pdf.cell(0, 7, _pdf_safe(f"Extracted Credentials ({len(creds_sc)})"), ln=True) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("Raw Credential", 120), ("Source", 30), ("Paste ID", 30)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for c in creds_sc[:150]: + pdf.set_fill_color(255, 240, 240); pdf.set_font("Helvetica", "", 7) + raw = _pdf_safe(c.get("raw", ""), 80) + src = _pdf_safe(c.get("source", ""), 20) + pid = _pdf_safe(c.get("paste_id", ""), 20) + for val, w in zip([raw, src, pid], [120, 30, 30]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + pdf.ln(3) + + if tg_hits: + pdf.set_font("Helvetica", "B", 10) + pdf.cell(0, 7, _pdf_safe(f"Telegram CTI ({len(tg_hits)})"), ln=True) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("Channel", 50), ("Message Excerpt", 100), ("Patterns", 30)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for t in tg_hits[:80]: + pdf.set_fill_color(245, 245, 255); pdf.set_font("Helvetica", "", 7) + link = _pdf_safe(f"t.me/s/{t.get('channel','')}", 35) + text = _pdf_safe(t.get("text", ""), 70) + pats = _pdf_safe(", ".join(f"{k}({len(v)})" for k, v in (t.get("patterns") or {}).items()), 25) + for val, w in zip([link, text, pats], [50, 100, 30]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + pdf.ln(3) + + if mc_hits: + pdf.set_font("Helvetica", "B", 10) + pdf.cell(0, 7, _pdf_safe(f"Misconfigurations ({len(mc_hits)})"), ln=True) + pdf.set_font("Helvetica", "B", 8) + pdf.set_fill_color(30, 30, 30); pdf.set_text_color(255, 255, 255) + for col_name, col_w in [("URL", 90), ("Title", 60), ("Dork", 30)]: + pdf.cell(col_w, 6, col_name, border=1, fill=True) + pdf.ln(); pdf.set_text_color(0, 0, 0) + for m in mc_hits[:80]: + pdf.set_fill_color(255, 245, 230); pdf.set_font("Helvetica", "", 7) + url_m = _pdf_safe(m.get("url", ""), 60) + title_m = _pdf_safe(m.get("title", ""), 40) + dork_m = _pdf_safe(m.get("dork", ""), 25) + for val, w in zip([url_m, title_m, dork_m], [90, 60, 30]): + pdf.cell(w, 5, val, border=1, fill=True) + pdf.ln() + + pdf.output(path) + print(f"[+] PDF report saved: {path}") diff --git a/sources/helpers/scanner.py b/sources/helpers/scanner.py new file mode 100644 index 0000000..535e9fb --- /dev/null +++ b/sources/helpers/scanner.py @@ -0,0 +1,525 @@ +""" +sources/helpers/scanner.py +Recursive Avalanche Engine for NOX autoscan. + +Pipeline per asset (sequential phases): + Phase 1 — Breach scan + Phase 2 — Hash crack (non-blocking, on breach results) + Phase 3 — Dork + Phase 4 — Scrape + → Harvest new identifiers from all phases + → Reinject every new unique identifier (not seen before) recursively +""" + +import asyncio +import logging +import re +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple + +if TYPE_CHECKING: + from nox import Orchestrator + +_syslog = logging.getLogger("nox.system") + +_EMAIL_RE = re.compile(r"[\w.+-]+@[\w-]+\.[\w.]+") +_USERNAME_RE = re.compile(r"(?:github\.com|twitter\.com|linkedin\.com/in|reddit\.com/u)/([A-Za-z0-9_.-]{3,39})", re.I) +_PHONE_RE = re.compile(r"\+\d[\d\s.\-()]{7,14}\d|\b\d{3}[\s.\-]\d{3}[\s.\-]\d{4}\b") +_NAME_RE = re.compile(r"\b([A-Z][a-z]{1,20}(?:\s+[A-Z][a-z]{1,20}){1,3})\b") + +_DORK_LIMIT = 20 +_PIVOT_TYPES = {"email", "username", "phone", "name", "ip", "domain"} + + +def _cfg_depth(orc=None) -> int: + # A7/A10: read from orchestrator config if available + if orc is not None: + cfg = getattr(orc, "config", None) + if cfg is not None: + v = getattr(cfg, "pivot_depth", None) + if v is not None: + return int(v) + try: + from nox import Cfg # type: ignore + return Cfg.PIVOT_DEPTH + except ImportError: + return 2 + + +def _cfg_concurrency(orc=None) -> int: + # A7: read from orchestrator config if available + if orc is not None: + cfg = getattr(orc, "config", None) + if cfg is not None: + v = getattr(cfg, "concurrency", None) + if v is not None: + return int(v) + try: + from nox import Cfg # type: ignore + return Cfg.CONCURRENCY + except ImportError: + return 15 + + +def _out(level: str, msg: str) -> None: + try: + from nox import out as _nox_out # type: ignore + _nox_out(level, msg) + except Exception: + import sys + print(f"[{level}] {msg}", file=sys.stderr) + + +def _extract_ids_from_text(text: str, exclude: str = "") -> List[Tuple[str, str]]: + """Extract pivotable identifiers from free text, excluding the current asset.""" + found: List[Tuple[str, str]] = [] + excl = exclude.lower() + for m in _EMAIL_RE.findall(text): + v = m.lower() + if v != excl: + found.append((v, "email")) + for m in _USERNAME_RE.findall(text): + v = m.lower() + if v != excl: + found.append((v, "username")) + for m in _PHONE_RE.findall(text): + clean = re.sub(r"[\s.\-()]", "", m) + if 8 <= len(clean) <= 15 and clean != excl: + found.append((clean, "phone")) + for m in _NAME_RE.findall(text): + if len(m.split()) >= 2 and m.lower() != excl: + found.append((m, "name")) + return found + + +def _ids_from_records(records: list, exclude: str = "") -> List[Tuple[str, str, str]]: + """ + Extract pivotable identifiers from breach records. + Returns (value, qtype, ref) where ref is the source/breach name for logging. + """ + found: List[Tuple[str, str, str]] = [] + excl = exclude.lower() + for r in records: + src = getattr(r, "source", "") or "" + breach = getattr(r, "breach_name", "") or src + for val, qtype in [ + (getattr(r, "email", ""), "email"), + (getattr(r, "username", ""), "username"), + (getattr(r, "phone", ""), "phone"), + (getattr(r, "full_name", ""), "name"), + (getattr(r, "ip_address", ""), "ip"), + (getattr(r, "domain", ""), "domain"), + ]: + if val and len(val) > 2 and val.lower() != excl: + found.append((val.strip(), qtype, breach)) + meta = getattr(r, "metadata", {}) or {} + for em in meta.get("emails", []): + if em and em.lower() != excl: + found.append((em.lower(), "email", breach)) + return found + + +# ── Pivot log entry schema ───────────────────────────────────────────────── +# { +# "asset": str, # identifier scanned +# "qtype": str, # email/username/phone/name/domain/ip +# "depth": int, # 0=seed, 1=first pivot, … +# "parent": str|None, # asset that discovered this one +# "found_in": str, # phase that found this asset: seed/breach/dork/scrape/hash_crack +# "records": int, # breach records found for this asset +# "dorks": int, # dork hits found for this asset +# "scrape": int, # scrape items found for this asset +# "children": List[dict], # [{asset, qtype, found_in, ref}] — new assets discovered +# "cracked": List[str], # plaintexts cracked from hashes in breach results +# } + + +class AvalancheScanner: + def __init__(self, orchestrator: "Orchestrator") -> None: + self._orc = orchestrator + self.seen_assets: Set[str] = set() + # A2: single semaphore for the entire run, created lazily inside the event loop + self._sem: Optional[asyncio.Semaphore] = None + self._all_records: List = [] + self._dork_hits: List[dict] = [] + self._seen_dork_urls: Set[str] = set() + # A6: scrape_hits merged atomically per _do_process call + self._scrape_hits: Dict = {"pastes": [], "credentials": [], "hashes": [], + "telegram": [], "dork_misconfigs": []} + self._max_depth: int = 0 + self._in_flight: Dict[str, asyncio.Future] = {} + self.pivot_log: List[dict] = [] + # A8: global set to prevent duplicate entries in discovered_assets + self._seen_discovered: Set[str] = set() + self.discovered_assets: List[dict] = [] + + def _get_sem(self) -> asyncio.Semaphore: + # A2: semaphore created once per run, shared across all coroutines + if self._sem is None: + self._sem = asyncio.Semaphore(_cfg_concurrency(self._orc)) + return self._sem + + async def run(self, target: str) -> tuple: + # A9: respect no_pivot flag from config + cfg = getattr(self._orc, "config", None) + no_pivot = getattr(cfg, "no_pivot", False) if cfg else False + if no_pivot: + try: + from nox import Detect # type: ignore + qtype = Detect.qtype(target) + except ImportError: + qtype = "email" + async with self._get_sem(): + try: + records = await self._orc._full_async_scan(target, qtype) + except Exception: + records = [] + self._all_records.extend(records) + self.seen_assets.add(target.lower().strip()) + self.pivot_log.append({ + "asset": target, "qtype": qtype, "depth": 0, "parent": None, + "found_in": "seed", "records": len(records), "dorks": 0, + "scrape": 0, "children": [], "cracked": [], + }) + return self._all_records, self._dork_hits, self._scrape_hits + await self._process(target, depth=0, parent=None, found_in="seed") + return self._all_records, self._dork_hits, self._scrape_hits + + def get_discovered_assets(self) -> List[dict]: + """Return flat list of all discovered assets with full provenance.""" + return self.discovered_assets + + def get_max_depth(self) -> int: + return self._max_depth + + # ── Dedup gate ──────────────────────────────────────────────────── + + async def _process(self, asset: str, depth: int, + parent: Optional[str], found_in: str) -> None: + """Dedup gate: ensures each asset is processed exactly once.""" + # A10: use per-run depth from orchestrator config + if depth > _cfg_depth(self._orc): + _syslog.debug("avalanche depth cap reached for %s", asset) + return + + key = asset.lower().strip() + if not key: + return + + # A1: add to seen_assets FIRST (atomic gate) before any other check. + # If already present, wait on the in-flight future if one exists, then return. + if key in self.seen_assets: + if key in self._in_flight: + try: + await self._in_flight[key] + except Exception: + pass + return + + self.seen_assets.add(key) + + # If already in-flight (shouldn't happen after the seen_assets check above, + # but guard defensively), wait and return. + if key in self._in_flight: + try: + await self._in_flight[key] + except Exception: + pass + return + + loop = asyncio.get_running_loop() + fut: asyncio.Future = loop.create_future() + self._in_flight[key] = fut + + try: + await self._do_process(asset, depth, parent, found_in) + finally: + if not fut.done(): + fut.set_result(None) + + # ── Core pipeline ───────────────────────────────────────────────── + + async def _do_process(self, asset: str, depth: int, + parent: Optional[str], found_in: str) -> None: + """ + Sequential pipeline: + Phase 1 — Breach scan + Phase 2 — Hash crack (concurrent, non-blocking) + Phase 3 — Dork + Phase 4 — Scrape + → Harvest all new identifiers with phase+ref annotation + → Reinject every unseen identifier + """ + if depth > self._max_depth: + self._max_depth = depth + + try: + from nox import Detect # type: ignore + qtype = Detect.qtype(asset) + except ImportError: + qtype = "email" + + indent = " " * depth + _out("pivot" if depth > 0 else "info", + f"{indent}[depth={depth}] {'↳' if depth > 0 else '◉'} {asset} ({qtype})" + + (f" ← {found_in} via {parent}" if parent else " [SEED]")) + _syslog.info("AVALANCHE asset=%s depth=%d parent=%s found_in=%s", + asset, depth, parent or "—", found_in) + + # ── Phase 1: Breach scan ────────────────────────────────────── + async with self._get_sem(): + try: + records: List = await self._orc._full_async_scan(asset, qtype) + except Exception as exc: + _syslog.warning("BREACH_FAIL asset=%s err=%s", asset, exc) + records = [] + + _out("ok" if records else "dim", + f"{indent} [breach] {len(records)} records") + _syslog.info("BREACH_DONE asset=%s records=%d", asset, len(records)) + self._all_records.extend(records) + + # ── Phase 2: Hash crack (non-blocking) ──────────────────────── + cracked_plaintexts: List[str] = [] + try: + from sources.helpers.cracker import detect_hash # type: ignore + import aiohttp as _aio # type: ignore + async with _aio.ClientSession(connector=_aio.TCPConnector(limit=5)) as _cs: + crack_tasks = [ + _crack_and_inject(_cs, getattr(r, "password_hash", ""), r, + self.seen_assets, self._all_records, + self, depth, asset, cracked_plaintexts) + for r in records + if getattr(r, "password_hash", "") and not getattr(r, "password", "") + and detect_hash(getattr(r, "password_hash", "")) + ] + if crack_tasks: + await asyncio.gather(*crack_tasks, return_exceptions=True) + except ImportError: + pass + + # ── Phase 3: Dork ───────────────────────────────────────────── + _out("info", f"{indent} [dork] querying for {asset}…") + try: + dork_res = await self._async_dork(asset, qtype) + except Exception as exc: + _syslog.warning("DORK_FAIL asset=%s err=%s", asset, exc) + dork_res = [] + + dork_count = 0 + for hit in (dork_res or [])[:_DORK_LIMIT]: + url = hit.get("url", "") or hit.get("title", "") + if url and url not in self._seen_dork_urls: + self._seen_dork_urls.add(url) + hit["pivot_asset"] = asset + hit["pivot_depth"] = depth + self._dork_hits.append(hit) + dork_count += 1 + _out("ok" if dork_count else "dim", + f"{indent} [dork] {dork_count} hits") + _syslog.info("DORK_DONE asset=%s hits=%d", asset, dork_count) + + # ── Phase 4: Scrape ─────────────────────────────────────────── + _out("info", f"{indent} [scrape] querying for {asset}…") + try: + scrape_res = await self._async_scrape(asset) + except Exception as exc: + _syslog.warning("SCRAPE_FAIL asset=%s err=%s", asset, exc) + scrape_res = {} + + # A6: collect scrape results locally, then merge atomically + scrape_count = 0 + local_scrape: Dict = {k: [] for k in self._scrape_hits} + for k in self._scrape_hits: + for item in (scrape_res or {}).get(k, []): + if isinstance(item, dict): + item["pivot_asset"] = asset + item["pivot_depth"] = depth + local_scrape[k].append(item) + scrape_count += 1 + # Atomic merge into shared dict (single-threaded event loop — safe) + for k, items in local_scrape.items(): + self._scrape_hits[k].extend(items) + _out("ok" if scrape_count else "dim", + f"{indent} [scrape] {scrape_count} items") + _syslog.info("SCRAPE_DONE asset=%s items=%d", asset, scrape_count) + + # ── Harvest new identifiers with phase+ref annotation ───────── + # Each entry: (value, qtype, found_in_phase, ref) + new_ids: List[Tuple[str, str, str, str]] = [] + + # From breach records + for val, vqtype, ref in _ids_from_records(records, exclude=asset): + if vqtype in _PIVOT_TYPES: + new_ids.append((val, vqtype, "breach", ref)) + + # From dork hits + for hit in (dork_res or [])[:_DORK_LIMIT]: + url = hit.get("url", "") + dork = hit.get("dork", "") + ref = url or dork + text = f"{hit.get('title','')} {hit.get('snippet','')} {url} {dork}" + for val, vqtype in _extract_ids_from_text(text, exclude=asset): + if vqtype in _PIVOT_TYPES: + new_ids.append((val, vqtype, "dork", ref[:120])) + + # From scrape results + for cred in (scrape_res or {}).get("credentials", []): + raw = cred.get("raw", "") + ref = f"paste:{cred.get('paste_id','')}" or cred.get("source", "scrape") + for val, vqtype in _extract_ids_from_text(raw, exclude=asset): + if vqtype in _PIVOT_TYPES: + new_ids.append((val, vqtype, "scrape", ref)) + for tg in (scrape_res or {}).get("telegram", []): + ref = f"t.me/{tg.get('channel','')}" + for val, vqtype in _extract_ids_from_text(tg.get("text", ""), exclude=asset): + if vqtype in _PIVOT_TYPES: + new_ids.append((val, vqtype, "scrape", ref)) + for mc in (scrape_res or {}).get("dork_misconfigs", []): + ref = mc.get("url", mc.get("title", "misconfig")) + for val, vqtype in _extract_ids_from_text( + f"{mc.get('title','')} {mc.get('snippet','')}", exclude=asset): + if vqtype in _PIVOT_TYPES: + new_ids.append((val, vqtype, "scrape", ref[:120])) + + # ── Deduplicate and queue children ──────────────────────────── + children: List[dict] = [] + child_tasks = [] + queued: Set[str] = set() + + for val, vqtype, phase, ref in new_ids: + child_key = val.lower().strip() + if not child_key or child_key in self.seen_assets or child_key in queued: + continue + queued.add(child_key) + child_entry = {"asset": val, "qtype": vqtype, "found_in": phase, "ref": ref} + children.append(child_entry) + # A8: prevent duplicate entries in discovered_assets across parallel parents + if child_key not in self._seen_discovered: + self._seen_discovered.add(child_key) + self.discovered_assets.append({ + "asset": val, + "qtype": vqtype, + "phase": phase, + "ref": ref, + "parent": asset, + "depth": depth + 1, + }) + _out("pivot", + f"{indent} ↳ new asset [{phase}]: {val} ({vqtype}) ref: {ref[:60]}") + _syslog.info("PIVOT_QUEUE asset=%s qtype=%s phase=%s ref=%s parent=%s depth=%d", + val, vqtype, phase, ref[:80], asset, depth + 1) + child_tasks.append( + self._process(val, depth + 1, parent=asset, found_in=phase) + ) + + # A5: run child tasks FIRST, then append pivot_log so the log reflects actual outcomes + if child_tasks: + _out("info", f"{indent} → reinjecting {len(child_tasks)} new asset(s)…") + await asyncio.gather(*child_tasks, return_exceptions=True) + + # ── Log this node (after children complete — A5) ────────────── + self.pivot_log.append({ + "asset": asset, + "qtype": qtype, + "depth": depth, + "parent": parent, + "found_in": found_in, + "records": len(records), + "dorks": dork_count, + "scrape": scrape_count, + "children": children, + "cracked": cracked_plaintexts or [], + }) + + # ── Dork dispatcher ─────────────────────────────────────────────── + + async def _async_dork(self, asset: str, qtype: str = "email") -> list: + try: + import aiohttp as _aio # type: ignore + import ssl as _ssl + connector = _aio.TCPConnector(limit=10, ssl=_ssl.create_default_context(), family=0) + async with _aio.ClientSession(connector=connector) as session: + recs = await self._orc.dorking_engine.async_search(session, asset, qtype) + return [ + { + "url": r.raw_data.get("url", "") if hasattr(r, "raw_data") else "", + "title": r.raw_data.get("url", r.raw_data.get("dork", "")) if hasattr(r, "raw_data") else "", + "snippet": "", + "dork": r.raw_data.get("dork", "") if hasattr(r, "raw_data") else "", + "engine": "DDG", + } + for r in recs + ] + except ImportError: + loop = asyncio.get_running_loop() + result = await loop.run_in_executor(None, self._orc.dork, asset) + return result if isinstance(result, list) else [] + except Exception as exc: + _syslog.debug("DORK_ERR asset=%s err=%s", asset, exc) + return [] + + # ── Scrape dispatcher ───────────────────────────────────────────── + + async def _async_scrape(self, asset: str) -> dict: + # A3: instantiate a fresh Session + ScrapeEngine per call to avoid sharing + # a non-thread-safe requests.Session / cloudscraper across concurrent coroutines. + _empty: dict = {"pastes": [], "credentials": [], "hashes": [], + "telegram": [], "dork_misconfigs": []} + try: + loop = asyncio.get_running_loop() + try: + from nox import Session, NoxConfig, ScrapeEngine # type: ignore + _cfg = getattr(self._orc, "config", None) or NoxConfig() + _session = Session(_cfg) + _engine = ScrapeEngine(_session, self._orc.db) + qtype = "email" + try: + from nox import Detect # type: ignore + qtype = Detect.qtype(asset) + except Exception: + pass + result = await loop.run_in_executor(None, _engine.run, asset, qtype) + except Exception: + result = await loop.run_in_executor(None, self._orc.scrape, asset) + return result if isinstance(result, dict) else _empty + except Exception as exc: + _syslog.debug("SCRAPE_ERR asset=%s err=%s", asset, exc) + return _empty + + +# ── Hash crack helper ────────────────────────────────────────────────────── + +async def _crack_and_inject(session, hash_value: str, record_ref, + seen_assets: Set[str], all_records: list, + scanner: "AvalancheScanner", + depth: int, parent_asset: str, + cracked_out: List[str]) -> None: + from sources.helpers.cracker import detect_hash, async_crack, CRACK_TIMEOUT # type: ignore + hash_type = detect_hash(hash_value) + if not hash_type: + return + try: + plaintext = await asyncio.wait_for( + async_crack(session, hash_value, hash_type), timeout=CRACK_TIMEOUT) + except (asyncio.TimeoutError, Exception) as exc: + _syslog.debug("CRACK_FAIL hash=%s reason=%s", hash_value[:16], exc) + return + + if not plaintext: + _syslog.debug("CRACK_FAIL hash=%s reason=no_result", hash_value[:16]) + return + + record_ref.password = plaintext + record_ref.hash_type = hash_type + if "Cracked" not in (record_ref.data_types or []): + record_ref.data_types = list(record_ref.data_types) + ["Cracked"] + _syslog.info("CRACK_OK hash=%s plain=%s parent=%s", hash_value[:16], plaintext, parent_asset) + _out("ok", f" [crack] {hash_value[:16]}… → {plaintext} (from {parent_asset})") + cracked_out.append(plaintext) + + # A4: inject cracked plaintext as qtype="password" — NOT as username. + # Only pivot on it if sources support password-recycling queries. + key = plaintext.lower() + if key not in seen_assets and depth + 1 <= _cfg_depth(scanner._orc): + await scanner._process(plaintext, depth + 1, + parent=parent_asset, found_in="hash_crack") diff --git a/sources/hibp_breached.json b/sources/hibp_breached.json new file mode 100644 index 0000000..f50ffe4 --- /dev/null +++ b/sources/hibp_breached.json @@ -0,0 +1,35 @@ +{ + "name": "hibp_breached", + "category": "breaches", + "endpoint": "https://haveibeenpwned.com/api/v3/breachedaccount/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "breaches": "$.*.Name" + }, + "rate_limit": 1.5, + "headers": { + "hibp-api-key": "{HIBP_API_KEY}", + "User-Agent": "NOX-Framework" + }, + "api_key_slots": [ + "{HIBP_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email", + "domain" + ], + "normalization_map": { + "Name": "breach_name" + }, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://haveibeenpwned.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/hudsonrock_osint.json b/sources/hudsonrock_osint.json new file mode 100644 index 0000000..3595d7f --- /dev/null +++ b/sources/hudsonrock_osint.json @@ -0,0 +1,30 @@ +{ + "name": "hudsonrock_osint", + "category": "breach_data", + "endpoint": "https://cavalier.hudsonrock.com/api/json/v2/osint-tools/search-by-login?username={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "stealers": "$.stealers" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "email", + "domain" + ], + "normalization_map": { + "stealers": "breach_record" + }, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://cavalier.hudsonrock.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/hunter_io.json b/sources/hunter_io.json new file mode 100644 index 0000000..9c44393 --- /dev/null +++ b/sources/hunter_io.json @@ -0,0 +1,30 @@ +{ + "name": "hunter_io", + "category": "discovery", + "endpoint": "https://api.hunter.io/v2/domain-search?domain={target}&api_key={HUNTER_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "emails": "$.data.emails[*].value" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{HUNTER_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "email" + ], + "normalization_map": { + "value": "email_address" + }, + "tags": [ + "passive" + ], + "health_check_url": "https://api.hunter.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/hunter_verify.json b/sources/hunter_verify.json new file mode 100644 index 0000000..70960b6 --- /dev/null +++ b/sources/hunter_verify.json @@ -0,0 +1,29 @@ +{ + "name": "hunter_verify", + "category": "email_rep", + "endpoint": "https://api.hunter.io/v2/email-verifier?email={target}&api_key={HUNTER_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "result": "$.data.result" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{HUNTER_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.hunter.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/hybrid_analysis.json b/sources/hybrid_analysis.json new file mode 100644 index 0000000..859e99e --- /dev/null +++ b/sources/hybrid_analysis.json @@ -0,0 +1,37 @@ +{ + "name": "hybrid_analysis", + "category": "threat_intel", + "endpoint": "https://www.hybrid-analysis.com/api/v2/search/hash", + "method": "POST", + "requires_auth": true, + "selectors": { + "verdict": "$.verdict" + }, + "rate_limit": 1.0, + "headers": { + "api-key": "{HYBRID_API_KEY}" + }, + "payload_template": { + "hash": "{target}" + }, + "api_key_slots": [ + "{HYBRID_API_KEY}" + ], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": { + "verdict": "malware_verdict" + }, + "tags": [ + "passive", + "threat", + "heavy" + ], + "health_check_url": "https://www.hybrid-analysis.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/intelx_phone.json b/sources/intelx_phone.json new file mode 100644 index 0000000..36488da --- /dev/null +++ b/sources/intelx_phone.json @@ -0,0 +1,30 @@ +{ + "name": "intelx_phone", + "category": "breaches", + "endpoint": "https://2.intelx.io/phone/search?phone={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "results": "$.results" + }, + "rate_limit": 1.0, + "headers": { + "x-key": "{INTELX_API_KEY}" + }, + "api_key_slots": [ + "{INTELX_API_KEY}" + ], + "input_type": "phone", + "output_type": [ + "phone" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://2.intelx.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/intelx_search.json b/sources/intelx_search.json new file mode 100644 index 0000000..b64d04a --- /dev/null +++ b/sources/intelx_search.json @@ -0,0 +1,44 @@ +{ + "name": "intelx_search", + "category": "breaches", + "endpoint": "https://2.intelx.io/intelligent/search", + "method": "POST", + "requires_auth": true, + "selectors": { + "id": "$.id" + }, + "rate_limit": 1.0, + "headers": { + "x-key": "{INTELX_API_KEY}" + }, + "payload_template": { + "term": "{target}", + "buckets": [], + "lookuplevel": 0, + "maxresults": 100, + "timeout": 0, + "datefrom": "", + "dateto": "", + "sort": 4, + "media": 0, + "terminate": [] + }, + "api_key_slots": [ + "{INTELX_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://2.intelx.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/intezer.json b/sources/intezer.json new file mode 100644 index 0000000..a70d1c2 --- /dev/null +++ b/sources/intezer.json @@ -0,0 +1,31 @@ +{ + "name": "intezer", + "category": "threat_intel", + "endpoint": "https://analyze.intezer.com/api/v2-0/get-analysis-by-hash/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "result": "$.result" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Bearer {INTEZER_API_KEY}" + }, + "api_key_slots": [ + "{INTEZER_API_KEY}" + ], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://analyze.intezer.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/ipapi_co.json b/sources/ipapi_co.json new file mode 100644 index 0000000..a34d25c --- /dev/null +++ b/sources/ipapi_co.json @@ -0,0 +1,33 @@ +{ + "name": "ipapi_co", + "category": "geolocation", + "endpoint": "https://ipapi.co/{target}/json/", + "method": "GET", + "requires_auth": false, + "selectors": { + "asn": "$.asn", + "org": "$.org" + }, + "rate_limit": 1.0, + "headers": { + "User-Agent": "Mozilla/5.0" + }, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "domain" + ], + "normalization_map": { + "asn": "asn_number", + "org": "asn_org" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://ipapi.co", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/ipdata_co.json b/sources/ipdata_co.json new file mode 100644 index 0000000..0413681 --- /dev/null +++ b/sources/ipdata_co.json @@ -0,0 +1,31 @@ +{ + "name": "ipdata_co", + "category": "geolocation", + "endpoint": "https://api.ipdata.co/{target}?api-key={IPDATA_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "threat": "$.threat" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{IPDATA_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "threat": "threat_info" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.ipdata.co", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/ipgeolocation_io.json b/sources/ipgeolocation_io.json new file mode 100644 index 0000000..223ceec --- /dev/null +++ b/sources/ipgeolocation_io.json @@ -0,0 +1,31 @@ +{ + "name": "ipgeolocation_io", + "category": "geolocation", + "endpoint": "https://api.ipgeolocation.io/ipgeo?apiKey={IPGEO_API_KEY}&ip={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "isp": "$.isp" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{IPGEO_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "isp": "asn_org" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.ipgeolocation.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/ipinfo_io.json b/sources/ipinfo_io.json new file mode 100644 index 0000000..83965d7 --- /dev/null +++ b/sources/ipinfo_io.json @@ -0,0 +1,31 @@ +{ + "name": "ipinfo_io", + "category": "geolocation", + "endpoint": "https://ipinfo.io/{target}/json", + "method": "GET", + "requires_auth": false, + "selectors": { + "org": "$.org", + "city": "$.city" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "domain" + ], + "normalization_map": { + "org": "asn_org", + "city": "geo_city" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://ipinfo.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/ipinfodb.json b/sources/ipinfodb.json new file mode 100644 index 0000000..e109b81 --- /dev/null +++ b/sources/ipinfodb.json @@ -0,0 +1,30 @@ +{ + "name": "ipinfodb", + "category": "geolocation", + "endpoint": "http://api.ipinfodb.com/v3/ip-city/?key={IPINFODB_API_KEY}&ip={target}&format=json", + "method": "GET", + "requires_auth": true, + "selectors": { + "city": "$.cityName" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{IPINFODB_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "cityName": "geo_city" + }, + "tags": [ + "passive" + ], + "health_check_url": "http://api.ipinfodb.com", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/ipqualityscore_email.json b/sources/ipqualityscore_email.json new file mode 100644 index 0000000..c973079 --- /dev/null +++ b/sources/ipqualityscore_email.json @@ -0,0 +1,31 @@ +{ + "name": "ipqualityscore_email", + "category": "email_rep", + "endpoint": "https://ipqualityscore.com/api/json/email/{IPQS_API_KEY}/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "fraud_score": "$.fraud_score" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{IPQS_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": { + "fraud_score": "email_fraud_score" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://ipqualityscore.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/ipstack.json b/sources/ipstack.json new file mode 100644 index 0000000..28928a0 --- /dev/null +++ b/sources/ipstack.json @@ -0,0 +1,31 @@ +{ + "name": "ipstack", + "category": "geolocation", + "endpoint": "http://api.ipstack.com/{target}?access_key={IPSTACK_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "country": "$.country_name" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{IPSTACK_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "country_name": "geo_country" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "http://api.ipstack.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/ipvigilante.json b/sources/ipvigilante.json new file mode 100644 index 0000000..8af6fcb --- /dev/null +++ b/sources/ipvigilante.json @@ -0,0 +1,27 @@ +{ + "name": "ipvigilante", + "category": "geolocation", + "endpoint": "https://ipvigilante.com/json/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "city": "$.data.city_name" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://ipvigilante.com", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/joesandbox.json b/sources/joesandbox.json new file mode 100644 index 0000000..ce3c4db --- /dev/null +++ b/sources/joesandbox.json @@ -0,0 +1,32 @@ +{ + "name": "joesandbox", + "category": "threat_intel", + "endpoint": "https://www.joesandbox.com/api/v2/analysis/search?q={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "id": "$.[*].id" + }, + "rate_limit": 1.0, + "headers": { + "X-JoeSandbox-Api-Key": "{JOE_API_KEY}" + }, + "api_key_slots": [ + "{JOE_API_KEY}" + ], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat", + "heavy" + ], + "health_check_url": "https://www.joesandbox.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/keybase_lookup.json b/sources/keybase_lookup.json new file mode 100644 index 0000000..fa5af42 --- /dev/null +++ b/sources/keybase_lookup.json @@ -0,0 +1,26 @@ +{ + "name": "keybase_lookup", + "category": "social", + "endpoint": "https://keybase.io/_/api/1.0/user/lookup.json?username={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "id": "$.them[0].id" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://keybase.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/keybase_proofs.json b/sources/keybase_proofs.json new file mode 100644 index 0000000..5e2460e --- /dev/null +++ b/sources/keybase_proofs.json @@ -0,0 +1,26 @@ +{ + "name": "keybase_proofs", + "category": "social", + "endpoint": "https://keybase.io/_/api/1.0/user/lookup.json?usernames={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "proofs": "$.them[0].proofs_summary.all[*].namestr" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://keybase.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/leak_lookup.json b/sources/leak_lookup.json new file mode 100644 index 0000000..a18bc77 --- /dev/null +++ b/sources/leak_lookup.json @@ -0,0 +1,32 @@ +{ + "name": "leak_lookup", + "category": "breaches", + "endpoint": "https://leak-lookup.com/api/search", + "method": "POST", + "requires_auth": false, + "selectors": { + "results": "$.message" + }, + "rate_limit": 1.0, + "headers": {}, + "payload_template": { + "query": "{target}", + "type": "email_address" + }, + "api_key_slots": [], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://leak-lookup.com", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/leakcheck.json b/sources/leakcheck.json new file mode 100644 index 0000000..f2db1c8 --- /dev/null +++ b/sources/leakcheck.json @@ -0,0 +1,33 @@ +{ + "name": "leakcheck", + "category": "breaches", + "endpoint": "https://leakcheck.io/api/v2/query/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "sources": "$.sources" + }, + "rate_limit": 1.0, + "headers": { + "X-API-Key": "{LEAKCHECK_API_KEY}" + }, + "api_key_slots": [ + "{LEAKCHECK_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": { + "sources": "breach_sources" + }, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://leakcheck.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/leakix_search.json b/sources/leakix_search.json new file mode 100644 index 0000000..8baddba --- /dev/null +++ b/sources/leakix_search.json @@ -0,0 +1,31 @@ +{ + "name": "leakix_search", + "category": "breaches", + "endpoint": "https://leakix.net/api/search?q={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "leaks": "$.[*].event_source" + }, + "rate_limit": 1.0, + "headers": { + "api-key": "{LEAKIX_API_KEY}" + }, + "api_key_slots": [ + "{LEAKIX_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "domain", + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://leakix.net", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/leakstats_pw.json b/sources/leakstats_pw.json new file mode 100644 index 0000000..d73436c --- /dev/null +++ b/sources/leakstats_pw.json @@ -0,0 +1,31 @@ +{ + "name": "leakstats_pw", + "category": "breaches", + "endpoint": "https://leakstats.net/api/password/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "count": "$.count" + }, + "rate_limit": 1.0, + "headers": { + "api-key": "{LEAKSTATS_API_KEY}" + }, + "api_key_slots": [ + "{LEAKSTATS_API_KEY}" + ], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://leakstats.net", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/mailboxlayer.json b/sources/mailboxlayer.json new file mode 100644 index 0000000..7717203 --- /dev/null +++ b/sources/mailboxlayer.json @@ -0,0 +1,28 @@ +{ + "name": "mailboxlayer", + "category": "email_rep", + "endpoint": "http://apilayer.net/api/check?access_key={MAILBOX_API_KEY}&email={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "score": "$.score" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{MAILBOX_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "http://apilayer.net", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/malshare.json b/sources/malshare.json new file mode 100644 index 0000000..bf6f098 --- /dev/null +++ b/sources/malshare.json @@ -0,0 +1,29 @@ +{ + "name": "malshare", + "category": "threat_intel", + "endpoint": "https://malshare.com/api.php?api_key={MALSHARE_API_KEY}&action=search&query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "hashes": "$.*" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{MALSHARE_API_KEY}" + ], + "input_type": "hash", + "output_type": [ + "hash" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://malshare.com", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/maltiverse_ip.json b/sources/maltiverse_ip.json new file mode 100644 index 0000000..7728ddf --- /dev/null +++ b/sources/maltiverse_ip.json @@ -0,0 +1,27 @@ +{ + "name": "maltiverse_ip", + "category": "threat_intel", + "endpoint": "https://api.maltiverse.com/ip/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "classification": "$.classification" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.maltiverse.com", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/metadefender_ip.json b/sources/metadefender_ip.json new file mode 100644 index 0000000..4ab2a61 --- /dev/null +++ b/sources/metadefender_ip.json @@ -0,0 +1,31 @@ +{ + "name": "metadefender_ip", + "category": "threat_intel", + "endpoint": "https://api.metadefender.com/v4/ip/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "lookup": "$.lookup_results" + }, + "rate_limit": 1.0, + "headers": { + "apikey": "{METADEFENDER_API_KEY}" + }, + "api_key_slots": [ + "{METADEFENDER_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.metadefender.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/misp_search.json b/sources/misp_search.json new file mode 100644 index 0000000..0a02101 --- /dev/null +++ b/sources/misp_search.json @@ -0,0 +1,38 @@ +{ + "name": "misp_search", + "category": "threat_intel", + "endpoint": "{MISP_URL}/attributes/restSearch", + "method": "POST", + "requires_auth": true, + "selectors": { + "attributes": "$.Attribute[*].value" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "{MISP_API_KEY}", + "Content-Type": "application/json" + }, + "payload_template": { + "returnFormat": "json", + "value": "{target}" + }, + "api_key_slots": [ + "{MISP_API_KEY}" + ], + "input_type": "any", + "output_type": [ + "ip", + "domain", + "hash" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "{MISP_URL}", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/npm_user.json b/sources/npm_user.json new file mode 100644 index 0000000..af6f9c6 --- /dev/null +++ b/sources/npm_user.json @@ -0,0 +1,26 @@ +{ + "name": "npm_user", + "category": "social", + "endpoint": "https://registry.npmjs.org/-/v1/search?text=maintainer:{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "packages": "$.objects[*].package.name" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://registry.npmjs.org", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/numverify.json b/sources/numverify.json new file mode 100644 index 0000000..ca3b5f9 --- /dev/null +++ b/sources/numverify.json @@ -0,0 +1,32 @@ +{ + "name": "numverify", + "category": "phone", + "endpoint": "http://apilayer.net/api/validate?access_key={NUMVERIFY_API_KEY}&number={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "valid": "$.valid", + "carrier": "$.carrier" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{NUMVERIFY_API_KEY}" + ], + "input_type": "phone", + "output_type": [ + "phone" + ], + "normalization_map": { + "valid": "phone_valid", + "carrier": "phone_carrier" + }, + "tags": [ + "passive" + ], + "health_check_url": "http://apilayer.net", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/onyphe_datascan.json b/sources/onyphe_datascan.json new file mode 100644 index 0000000..35637a0 --- /dev/null +++ b/sources/onyphe_datascan.json @@ -0,0 +1,32 @@ +{ + "name": "onyphe_datascan", + "category": "scanners", + "endpoint": "https://www.onyphe.io/api/v2/simple/datascan/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "results": "$.results" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "apikey {ONYPHE_API_KEY}" + }, + "api_key_slots": [ + "{ONYPHE_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://www.onyphe.io", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/packetstorm.json b/sources/packetstorm.json new file mode 100644 index 0000000..e96abf2 --- /dev/null +++ b/sources/packetstorm.json @@ -0,0 +1,26 @@ +{ + "name": "packetstorm", + "category": "vulns", + "endpoint": "https://packetstormsecurity.com/search/?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "results": "text_lines" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "any", + "output_type": [ + "url" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://packetstormsecurity.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/passivetotal_enrich.json b/sources/passivetotal_enrich.json new file mode 100644 index 0000000..5566e77 --- /dev/null +++ b/sources/passivetotal_enrich.json @@ -0,0 +1,30 @@ +{ + "name": "passivetotal_enrich", + "category": "enrichment", + "endpoint": "https://api.passivetotal.org/v2/enrichment?query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "tags": "$.tags" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Basic {PASSIVETOTAL_AUTH_BASE64}" + }, + "api_key_slots": [ + "{PASSIVETOTAL_AUTH_BASE64}" + ], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.passivetotal.org", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/passivetotal_whois.json b/sources/passivetotal_whois.json new file mode 100644 index 0000000..2f0ad79 --- /dev/null +++ b/sources/passivetotal_whois.json @@ -0,0 +1,33 @@ +{ + "name": "passivetotal_whois", + "category": "whois", + "endpoint": "https://api.passivetotal.org/v2/whois?query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "registrar": "$.registrar" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Basic {PASSIVETOTAL_AUTH_BASE64}" + }, + "api_key_slots": [ + "{PASSIVETOTAL_AUTH_BASE64}" + ], + "input_type": "domain", + "output_type": [ + "email", + "domain" + ], + "normalization_map": { + "registrar": "registrar_name" + }, + "tags": [ + "passive" + ], + "health_check_url": "https://api.passivetotal.org", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/phishtank_check.json b/sources/phishtank_check.json new file mode 100644 index 0000000..d318761 --- /dev/null +++ b/sources/phishtank_check.json @@ -0,0 +1,31 @@ +{ + "name": "phishtank_check", + "category": "threat_intel", + "endpoint": "https://checkurl.phishtank.com/checkurl/", + "method": "POST", + "requires_auth": false, + "selectors": { + "in_database": "$.results.in_database" + }, + "rate_limit": 1.0, + "headers": {}, + "payload_template": { + "url": "{target}", + "format": "json" + }, + "api_key_slots": [], + "input_type": "url", + "output_type": [ + "url" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://checkurl.phishtank.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/pipl_search.json b/sources/pipl_search.json new file mode 100644 index 0000000..2c98172 --- /dev/null +++ b/sources/pipl_search.json @@ -0,0 +1,30 @@ +{ + "name": "pipl_search", + "category": "enrichment", + "endpoint": "https://api.pipl.com/search/?email={target}&key={PIPL_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "person": "$.person" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{PIPL_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "username", + "domain", + "phone" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.pipl.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/pulsedive.json b/sources/pulsedive.json new file mode 100644 index 0000000..e8c44fe --- /dev/null +++ b/sources/pulsedive.json @@ -0,0 +1,29 @@ +{ + "name": "pulsedive", + "category": "threat_intel", + "endpoint": "https://pulsedive.com/api/info.php?indicator={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "risk": "$.risk", + "threats": "$.threats" + }, + "rate_limit": 2.0, + "headers": {}, + "api_key_slots": [], + "input_type": "any", + "output_type": [ + "domain", + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://pulsedive.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/pulsedive_analyze.json b/sources/pulsedive_analyze.json new file mode 100644 index 0000000..efaf550 --- /dev/null +++ b/sources/pulsedive_analyze.json @@ -0,0 +1,34 @@ +{ + "name": "pulsedive_analyze", + "category": "threat_intel", + "endpoint": "https://pulsedive.com/api/analyze.php?value={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "risk": "$.risk" + }, + "rate_limit": 1.0, + "headers": { + "key": "{PULSEDIVE_API_KEY}" + }, + "api_key_slots": [ + "{PULSEDIVE_API_KEY}" + ], + "input_type": "any", + "output_type": [ + "ip", + "domain" + ], + "normalization_map": { + "risk": "risk_level" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://pulsedive.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/pypi_user.json b/sources/pypi_user.json new file mode 100644 index 0000000..a080111 --- /dev/null +++ b/sources/pypi_user.json @@ -0,0 +1,26 @@ +{ + "name": "pypi_user", + "category": "social", + "endpoint": "https://pypi.org/pypi/{target}/json", + "method": "GET", + "requires_auth": false, + "selectors": { + "info": "$.info" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://pypi.org", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/recordedfuture_ip.json b/sources/recordedfuture_ip.json new file mode 100644 index 0000000..44a2e81 --- /dev/null +++ b/sources/recordedfuture_ip.json @@ -0,0 +1,33 @@ +{ + "name": "recordedfuture_ip", + "category": "threat_intel", + "endpoint": "https://api.recordedfuture.com/v2/ip/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "risk": "$.data.risk.score" + }, + "rate_limit": 1.0, + "headers": { + "X-RFToken": "{RF_TOKEN}" + }, + "api_key_slots": [ + "{RF_TOKEN}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "score": "rf_risk_score" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.recordedfuture.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/reddit_user.json b/sources/reddit_user.json new file mode 100644 index 0000000..a403170 --- /dev/null +++ b/sources/reddit_user.json @@ -0,0 +1,28 @@ +{ + "name": "reddit_user", + "category": "social", + "endpoint": "https://www.reddit.com/user/{target}/about.json", + "method": "GET", + "requires_auth": false, + "selectors": { + "karma": "$.data.total_karma" + }, + "rate_limit": 2.0, + "headers": { + "User-Agent": "NOX" + }, + "api_key_slots": [], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://www.reddit.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/robtex_ip.json b/sources/robtex_ip.json new file mode 100644 index 0000000..089fdad --- /dev/null +++ b/sources/robtex_ip.json @@ -0,0 +1,27 @@ +{ + "name": "robtex_ip", + "category": "network", + "endpoint": "https://freeapi.robtex.com/ipquery/{target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "as": "$.asname" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://freeapi.robtex.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/scamwatcher.json b/sources/scamwatcher.json new file mode 100644 index 0000000..1099df2 --- /dev/null +++ b/sources/scamwatcher.json @@ -0,0 +1,30 @@ +{ + "name": "scamwatcher", + "category": "threat_intel", + "endpoint": "https://www.scamwatcher.com/scam/search?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "results": "text_lines" + }, + "rate_limit": 1.0, + "headers": { + "User-Agent": "Mozilla/5.0" + }, + "api_key_slots": [], + "input_type": "any", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://www.scamwatcher.com", + "expected_status": 200, + "reliability_score": 2, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.55 +} \ No newline at end of file diff --git a/sources/scylla_sh_search.json b/sources/scylla_sh_search.json new file mode 100644 index 0000000..eee1826 --- /dev/null +++ b/sources/scylla_sh_search.json @@ -0,0 +1,31 @@ +{ + "name": "scylla_sh_search", + "category": "breaches", + "endpoint": "https://scylla.sh/search?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "results": "$.*" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "email", + "output_type": [ + "email", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://scylla.sh", + "expected_status": 200, + "reliability_score": 2, + "is_volatile": true, + "backup_endpoints": [ + "https://scylla.sh/api/search?q={target}" + ], + "confidence": 0.55 +} \ No newline at end of file diff --git a/sources/securitytrails_history.json b/sources/securitytrails_history.json new file mode 100644 index 0000000..ea1a493 --- /dev/null +++ b/sources/securitytrails_history.json @@ -0,0 +1,32 @@ +{ + "name": "securitytrails_history", + "category": "dns_recon", + "endpoint": "https://api.securitytrails.com/v1/history/{target}/dns/a", + "method": "GET", + "requires_auth": true, + "selectors": { + "history": "$.records[*].values[*].ip" + }, + "rate_limit": 1.0, + "headers": { + "APIKEY": "{SECURITYTRAILS_API_KEY}" + }, + "api_key_slots": [ + "{SECURITYTRAILS_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": { + "ip": "historical_ip" + }, + "tags": [ + "passive" + ], + "health_check_url": "https://api.securitytrails.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/securitytrails_sub.json b/sources/securitytrails_sub.json new file mode 100644 index 0000000..7815242 --- /dev/null +++ b/sources/securitytrails_sub.json @@ -0,0 +1,30 @@ +{ + "name": "securitytrails_sub", + "category": "dns_recon", + "endpoint": "https://api.securitytrails.com/v1/domain/{target}/subdomains", + "method": "GET", + "requires_auth": true, + "selectors": { + "subdomains": "$.subdomains" + }, + "rate_limit": 1.0, + "headers": { + "APIKEY": "{SECURITYTRAILS_API_KEY}" + }, + "api_key_slots": [ + "{SECURITYTRAILS_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.securitytrails.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/shodan_dns.json b/sources/shodan_dns.json new file mode 100644 index 0000000..36ea6e5 --- /dev/null +++ b/sources/shodan_dns.json @@ -0,0 +1,29 @@ +{ + "name": "shodan_dns", + "category": "dns_recon", + "endpoint": "https://api.shodan.io/dns/domain/{target}?key={SHODAN_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "subdomains": "$.subdomains" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{SHODAN_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://api.shodan.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/shodan_exploits.json b/sources/shodan_exploits.json new file mode 100644 index 0000000..0f52170 --- /dev/null +++ b/sources/shodan_exploits.json @@ -0,0 +1,28 @@ +{ + "name": "shodan_exploits", + "category": "vulns", + "endpoint": "https://exploits.shodan.io/api/search?query={target}&key={SHODAN_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "total": "$.total" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{SHODAN_API_KEY}" + ], + "input_type": "cve", + "output_type": [ + "cve" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://exploits.shodan.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/shodan_host.json b/sources/shodan_host.json new file mode 100644 index 0000000..955578f --- /dev/null +++ b/sources/shodan_host.json @@ -0,0 +1,34 @@ +{ + "name": "shodan_host", + "category": "scanners", + "endpoint": "https://api.shodan.io/shodan/host/{target}?key={SHODAN_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "ports": "$.ports", + "vulns": "$.vulns" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{SHODAN_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "domain", + "vuln" + ], + "normalization_map": { + "ports": "open_ports", + "vulns": "cve_list" + }, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://api.shodan.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/shodan_search.json b/sources/shodan_search.json new file mode 100644 index 0000000..6dbca8a --- /dev/null +++ b/sources/shodan_search.json @@ -0,0 +1,31 @@ +{ + "name": "shodan_search", + "category": "scanners", + "endpoint": "https://api.shodan.io/shodan/host/search?key={SHODAN_API_KEY}&query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "ips": "$.matches[*].ip_str" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{SHODAN_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": { + "ip_str": "ip_address" + }, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://api.shodan.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/snusbase.json b/sources/snusbase.json new file mode 100644 index 0000000..01e876f --- /dev/null +++ b/sources/snusbase.json @@ -0,0 +1,46 @@ +{ + "name": "snusbase", + "category": "breaches", + "endpoint": "https://api.snusbase.com/data/search", + "method": "POST", + "requires_auth": true, + "selectors": { + "leaks": "$.results" + }, + "rate_limit": 1.0, + "headers": { + "Auth": "{SNUSBASE_API_KEY}", + "Content-Type": "application/json" + }, + "payload_template": { + "terms": [ + "{target}" + ], + "types": [ + "email" + ] + }, + "api_key_slots": [ + "{SNUSBASE_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email", + "username" + ], + "normalization_map": { + "email": "email_address", + "username": "username", + "password": "plaintext_password", + "hash": "password_hash" + }, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://api.snusbase.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/spycloud_breach.json b/sources/spycloud_breach.json new file mode 100644 index 0000000..c7e406a --- /dev/null +++ b/sources/spycloud_breach.json @@ -0,0 +1,38 @@ +{ + "name": "spycloud_breach", + "category": "breaches", + "endpoint": "https://api.spycloud.io/enterprise-v2/breach/data/emails/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "results": "$.results" + }, + "rate_limit": 1.0, + "headers": { + "X-API-Key": "{SPYCLOUD_API_KEY}" + }, + "api_key_slots": [ + "{SPYCLOUD_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email", + "username", + "ip" + ], + "normalization_map": { + "email": "email_address", + "username": "username", + "password": "plaintext_password", + "ip_addresses": "ip_address" + }, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://api.spycloud.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/spyonweb.json b/sources/spyonweb.json new file mode 100644 index 0000000..f0353a8 --- /dev/null +++ b/sources/spyonweb.json @@ -0,0 +1,28 @@ +{ + "name": "spyonweb", + "category": "dns_recon", + "endpoint": "https://api.spyonweb.com/v1/summary/{target}?access_token={SPYONWEB_API_KEY}", + "method": "GET", + "requires_auth": true, + "selectors": { + "adsense": "$.result.adsense" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{SPYONWEB_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.spyonweb.com", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/spyse_domain.json b/sources/spyse_domain.json new file mode 100644 index 0000000..773e40f --- /dev/null +++ b/sources/spyse_domain.json @@ -0,0 +1,30 @@ +{ + "name": "spyse_domain", + "category": "scanners", + "endpoint": "https://api.spyse.com/v1/domain/details/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "asn": "$.data.asn" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Bearer {SPYSE_API_KEY}" + }, + "api_key_slots": [ + "{SPYSE_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.spyse.com", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/spyse_ip.json b/sources/spyse_ip.json new file mode 100644 index 0000000..1925cd2 --- /dev/null +++ b/sources/spyse_ip.json @@ -0,0 +1,30 @@ +{ + "name": "spyse_ip", + "category": "scanners", + "endpoint": "https://api.spyse.com/v1/ip/details/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "geo": "$.data.geo" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Bearer {SPYSE_API_KEY}" + }, + "api_key_slots": [ + "{SPYSE_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.spyse.com", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/sublist3r_api.json b/sources/sublist3r_api.json new file mode 100644 index 0000000..b4d01d9 --- /dev/null +++ b/sources/sublist3r_api.json @@ -0,0 +1,27 @@ +{ + "name": "sublist3r_api", + "category": "dns_recon", + "endpoint": "https://api.sublist3r.com/search.php?domain={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "subdomains": "$.*" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.sublist3r.com", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/threatconnect_search.json b/sources/threatconnect_search.json new file mode 100644 index 0000000..924654f --- /dev/null +++ b/sources/threatconnect_search.json @@ -0,0 +1,32 @@ +{ + "name": "threatconnect_search", + "category": "threat_intel", + "endpoint": "https://api.threatconnect.com/v2/indicators/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "data": "$.data" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "TC {TC_API_KEY}:{TC_SIGNATURE}" + }, + "api_key_slots": [ + "{TC_API_KEY}" + ], + "input_type": "any", + "output_type": [ + "ip", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.threatconnect.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/threatcrowd_domain.json b/sources/threatcrowd_domain.json new file mode 100644 index 0000000..8f257a2 --- /dev/null +++ b/sources/threatcrowd_domain.json @@ -0,0 +1,32 @@ +{ + "name": "threatcrowd_domain", + "category": "threat_intel", + "endpoint": "https://www.threatcrowd.org/searchApi/v2/domain/report/?domain={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "ips": "$.resolutions[*].ip_address" + }, + "rate_limit": 5.0, + "headers": {}, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://www.threatcrowd.org", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "bypass_required": [ + "cloudflare" + ], + "user_agent_type": "browser", + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/threatcrowd_email.json b/sources/threatcrowd_email.json new file mode 100644 index 0000000..fcc7dfc --- /dev/null +++ b/sources/threatcrowd_email.json @@ -0,0 +1,32 @@ +{ + "name": "threatcrowd_email", + "category": "threat_intel", + "endpoint": "https://www.threatcrowd.org/searchApi/v2/email/report/?email={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "domains": "$.domains" + }, + "rate_limit": 5.0, + "headers": {}, + "api_key_slots": [], + "input_type": "email", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://www.threatcrowd.org", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "bypass_required": [ + "cloudflare" + ], + "user_agent_type": "browser", + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/threatminer_domain.json b/sources/threatminer_domain.json new file mode 100644 index 0000000..9920e94 --- /dev/null +++ b/sources/threatminer_domain.json @@ -0,0 +1,28 @@ +{ + "name": "threatminer_domain", + "category": "threat_intel", + "endpoint": "https://api.threatminer.org/v2/domain.php?q={target}&rt=1", + "method": "GET", + "requires_auth": false, + "selectors": { + "ips": "$.results" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.threatminer.org", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/threatminer_ip.json b/sources/threatminer_ip.json new file mode 100644 index 0000000..d9e454b --- /dev/null +++ b/sources/threatminer_ip.json @@ -0,0 +1,28 @@ +{ + "name": "threatminer_ip", + "category": "threat_intel", + "endpoint": "https://api.threatminer.org/v2/host.php?q={target}&rt=1", + "method": "GET", + "requires_auth": false, + "selectors": { + "urls": "$.results" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "ip", + "output_type": [ + "url" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.threatminer.org", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/threatportal.json b/sources/threatportal.json new file mode 100644 index 0000000..ae4bcab --- /dev/null +++ b/sources/threatportal.json @@ -0,0 +1,33 @@ +{ + "name": "threatportal", + "category": "threat_intel", + "endpoint": "https://threatportal.io/api/v1/search?q={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "results": "$.results" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Bearer {TP_API_KEY}" + }, + "api_key_slots": [ + "{TP_API_KEY}" + ], + "input_type": "any", + "output_type": [ + "ip", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://threatportal.io", + "expected_status": 200, + "reliability_score": 3, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/tines_breach.json b/sources/tines_breach.json new file mode 100644 index 0000000..0e5cf30 --- /dev/null +++ b/sources/tines_breach.json @@ -0,0 +1,30 @@ +{ + "name": "tines_breach", + "category": "breaches", + "endpoint": "https://api.tines.com/breaches/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "breaches": "$.breaches" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Bearer {TINES_API_KEY}" + }, + "api_key_slots": [ + "{TINES_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.tines.com", + "expected_status": 200, + "reliability_score": 3, + "backup_endpoints": [], + "confidence": 0.7 +} \ No newline at end of file diff --git a/sources/twitter_v2.json b/sources/twitter_v2.json new file mode 100644 index 0000000..baed8b2 --- /dev/null +++ b/sources/twitter_v2.json @@ -0,0 +1,30 @@ +{ + "name": "twitter_v2", + "category": "social", + "endpoint": "https://api.twitter.com/2/users/by/username/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "id": "$.data.id" + }, + "rate_limit": 1.0, + "headers": { + "Authorization": "Bearer {TWITTER_BEARER_TOKEN}" + }, + "api_key_slots": [ + "{TWITTER_BEARER_TOKEN}" + ], + "input_type": "username", + "output_type": [ + "username" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.twitter.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/urlscan_search.json b/sources/urlscan_search.json new file mode 100644 index 0000000..8c77012 --- /dev/null +++ b/sources/urlscan_search.json @@ -0,0 +1,28 @@ +{ + "name": "urlscan_search", + "category": "url_analysis", + "endpoint": "https://urlscan.io/api/v1/search/?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "urls": "$.results[*].page.url" + }, + "rate_limit": 2.0, + "headers": {}, + "api_key_slots": [], + "input_type": "domain", + "output_type": [ + "url", + "ip", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://urlscan.io", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/urlvoid.json b/sources/urlvoid.json new file mode 100644 index 0000000..b3a19fe --- /dev/null +++ b/sources/urlvoid.json @@ -0,0 +1,29 @@ +{ + "name": "urlvoid", + "category": "threat_intel", + "endpoint": "https://api.urlvoid.com/api1000/{URLVOID_API_KEY}/host/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "detections": "$.detections" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{URLVOID_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://api.urlvoid.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/viewdns_reverse_ip.json b/sources/viewdns_reverse_ip.json new file mode 100644 index 0000000..a7bcea0 --- /dev/null +++ b/sources/viewdns_reverse_ip.json @@ -0,0 +1,28 @@ +{ + "name": "viewdns_reverse_ip", + "category": "dns_recon", + "endpoint": "https://api.viewdns.info/reverseip/?host={target}&apikey={VIEWDNS_API_KEY}&output=json", + "method": "GET", + "requires_auth": true, + "selectors": { + "domains": "$.response.domains[*].name" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{VIEWDNS_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.viewdns.info", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/vigilante_pw.json b/sources/vigilante_pw.json new file mode 100644 index 0000000..4c9cd8b --- /dev/null +++ b/sources/vigilante_pw.json @@ -0,0 +1,28 @@ +{ + "name": "vigilante_pw", + "category": "breaches", + "endpoint": "https://vigilante.pw/api/search?q={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "results": "$.results" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive", + "stealth" + ], + "health_check_url": "https://vigilante.pw", + "expected_status": 200, + "reliability_score": 2, + "is_volatile": true, + "backup_endpoints": [], + "confidence": 0.55 +} \ No newline at end of file diff --git a/sources/virustotal_domain.json b/sources/virustotal_domain.json new file mode 100644 index 0000000..f207fbb --- /dev/null +++ b/sources/virustotal_domain.json @@ -0,0 +1,33 @@ +{ + "name": "virustotal_domain", + "category": "threat_intel", + "endpoint": "https://www.virustotal.com/api/v3/domains/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "malicious": "$.data.attributes.last_analysis_stats.malicious" + }, + "rate_limit": 15.0, + "headers": { + "x-apikey": "{VIRUSTOTAL_API_KEY}" + }, + "api_key_slots": [ + "{VIRUSTOTAL_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "domain" + ], + "normalization_map": { + "malicious": "malicious_count" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://www.virustotal.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/virustotal_ip.json b/sources/virustotal_ip.json new file mode 100644 index 0000000..2f894ab --- /dev/null +++ b/sources/virustotal_ip.json @@ -0,0 +1,33 @@ +{ + "name": "virustotal_ip", + "category": "threat_intel", + "endpoint": "https://www.virustotal.com/api/v3/ip_addresses/{target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "reputation": "$.data.attributes.reputation" + }, + "rate_limit": 15.0, + "headers": { + "x-apikey": "{VIRUSTOTAL_API_KEY}" + }, + "api_key_slots": [ + "{VIRUSTOTAL_API_KEY}" + ], + "input_type": "ip", + "output_type": [ + "ip" + ], + "normalization_map": { + "reputation": "vt_reputation" + }, + "tags": [ + "passive", + "threat" + ], + "health_check_url": "https://www.virustotal.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/vulners_search.json b/sources/vulners_search.json new file mode 100644 index 0000000..078b06c --- /dev/null +++ b/sources/vulners_search.json @@ -0,0 +1,30 @@ +{ + "name": "vulners_search", + "category": "threat_intel", + "endpoint": "https://vulners.com/api/v3/search/lucene/?query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "results": "$.data.search[*]._source.title" + }, + "rate_limit": 1.0, + "headers": { + "X-Vulners-Api-Key": "{VULNERS_API_KEY}" + }, + "api_key_slots": [ + "{VULNERS_API_KEY}" + ], + "input_type": "cve", + "output_type": [ + "cve" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://vulners.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/wayback_machine.json b/sources/wayback_machine.json new file mode 100644 index 0000000..4920c33 --- /dev/null +++ b/sources/wayback_machine.json @@ -0,0 +1,26 @@ +{ + "name": "wayback_machine", + "category": "archive", + "endpoint": "https://archive.org/wayback/available?url={target}", + "method": "GET", + "requires_auth": false, + "selectors": { + "snapshot": "$.archived_snapshots.closest.url" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [], + "input_type": "url", + "output_type": [ + "url" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://archive.org", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/whois_freaks.json b/sources/whois_freaks.json new file mode 100644 index 0000000..223c5d6 --- /dev/null +++ b/sources/whois_freaks.json @@ -0,0 +1,28 @@ +{ + "name": "whois_freaks", + "category": "whois", + "endpoint": "https://whoisfreaks.com/api/v1/whois?apiKey={WF_API_KEY}&whois=live&domainName={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "emails": "$.whois_record.registrant_contact.email_address" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{WF_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "email" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://whoisfreaks.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/whoisxml_api.json b/sources/whoisxml_api.json new file mode 100644 index 0000000..ed6b5ab --- /dev/null +++ b/sources/whoisxml_api.json @@ -0,0 +1,31 @@ +{ + "name": "whoisxml_api", + "category": "whois", + "endpoint": "https://www.whoisxmlapi.com/whoisserver/WhoisService?apiKey={WHOISXML_API_KEY}&domainName={target}&outputFormat=JSON", + "method": "GET", + "requires_auth": true, + "selectors": { + "created": "$.WhoisRecord.createdDate" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{WHOISXML_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "email", + "domain" + ], + "normalization_map": { + "createdDate": "registration_date" + }, + "tags": [ + "passive" + ], + "health_check_url": "https://www.whoisxmlapi.com", + "expected_status": 200, + "reliability_score": 5, + "backup_endpoints": [], + "confidence": 1.0 +} \ No newline at end of file diff --git a/sources/whoxy_whois.json b/sources/whoxy_whois.json new file mode 100644 index 0000000..1545844 --- /dev/null +++ b/sources/whoxy_whois.json @@ -0,0 +1,29 @@ +{ + "name": "whoxy_whois", + "category": "whois", + "endpoint": "https://api.whoxy.com/?key={WHOXY_API_KEY}&whois={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "registrar": "$.registrar_name" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{WHOXY_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "email", + "domain" + ], + "normalization_map": {}, + "tags": [ + "passive" + ], + "health_check_url": "https://api.whoxy.com", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/zerobounce.json b/sources/zerobounce.json new file mode 100644 index 0000000..31f039d --- /dev/null +++ b/sources/zerobounce.json @@ -0,0 +1,31 @@ +{ + "name": "zerobounce", + "category": "email_rep", + "endpoint": "https://api.zerobounce.net/v2/validate?api_key={ZEROBOUNCE_API_KEY}&email={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "status": "$.status" + }, + "rate_limit": 1.0, + "headers": {}, + "api_key_slots": [ + "{ZEROBOUNCE_API_KEY}" + ], + "input_type": "email", + "output_type": [ + "email" + ], + "normalization_map": { + "status": "email_validity" + }, + "tags": [ + "passive", + "fast" + ], + "health_check_url": "https://api.zerobounce.net", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/sources/zoomeye_host.json b/sources/zoomeye_host.json new file mode 100644 index 0000000..10295a4 --- /dev/null +++ b/sources/zoomeye_host.json @@ -0,0 +1,31 @@ +{ + "name": "zoomeye_host", + "category": "scanners", + "endpoint": "https://api.zoomeye.org/host/search?query={target}", + "method": "GET", + "requires_auth": true, + "selectors": { + "hosts": "$.matches[*].ip" + }, + "rate_limit": 1.0, + "headers": { + "API-KEY": "{ZOOMEYE_API_KEY}" + }, + "api_key_slots": [ + "{ZOOMEYE_API_KEY}" + ], + "input_type": "domain", + "output_type": [ + "ip" + ], + "normalization_map": {}, + "tags": [ + "passive", + "infrastructure" + ], + "health_check_url": "https://api.zoomeye.org", + "expected_status": 200, + "reliability_score": 4, + "backup_endpoints": [], + "confidence": 0.85 +} \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..08cdd9a --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""tests/__init__.py""" diff --git a/tests/test_cracker.py b/tests/test_cracker.py new file mode 100644 index 0000000..f7cdf2e --- /dev/null +++ b/tests/test_cracker.py @@ -0,0 +1,26 @@ +"""tests/test_cracker.py — Unit tests for hash detection.""" +import sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from sources.helpers.cracker import detect_hash + + +def test_md5(): + assert detect_hash("5f4dcc3b5aa765d61d8327deb882cf99") == "md5" + +def test_sha1(): + assert detect_hash("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d") == "sha1" + +def test_sha256(): + assert detect_hash("5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8") == "sha256" + +def test_bcrypt(): + assert detect_hash("$2b$12$EixZaYVK1fsbw1ZfbX3OXePaWxn96p36WQoeG6Lruj3vjPGga31lW") == "bcrypt" + +def test_non_hash(): + assert detect_hash("notahash") is None + assert detect_hash("") is None + assert detect_hash("hello@world.com") is None + +def test_uppercase_md5(): + assert detect_hash("5F4DCC3B5AA765D61D8327DEB882CF99") == "md5" diff --git a/tests/test_detect.py b/tests/test_detect.py new file mode 100644 index 0000000..7d20a65 --- /dev/null +++ b/tests/test_detect.py @@ -0,0 +1,28 @@ +"""tests/test_detect.py — Unit tests for input type detection.""" +import sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from nox import Detect + + +def test_email(): + assert Detect.qtype("user@example.com") == "email" + assert Detect.qtype("first.last+tag@sub.domain.org") == "email" + +def test_domain(): + assert Detect.qtype("example.com") == "domain" + assert Detect.qtype("sub.example.co.uk") == "domain" + +def test_ip(): + assert Detect.qtype("192.168.1.1") == "ip" + assert Detect.qtype("8.8.8.8") == "ip" + +def test_hash_md5(): + assert Detect.qtype("5f4dcc3b5aa765d61d8327deb882cf99") == "hash" + +def test_hash_sha256(): + assert Detect.qtype("5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8") == "hash" + +def test_username(): + assert Detect.qtype("johndoe") == "username" + assert Detect.qtype("john_doe_99") == "username" diff --git a/tests/test_identity.py b/tests/test_identity.py new file mode 100644 index 0000000..6cc3529 --- /dev/null +++ b/tests/test_identity.py @@ -0,0 +1,45 @@ +"""tests/test_identity.py — Unit tests for IdentityResolver Union-Find clustering.""" +import sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from nox import Record, IdentityResolver + + +def _rec(email="", username="", password="", source="S"): + return Record(source=source, email=email, username=username, password=password) + + +def test_single_record_one_cluster(): + records = [_rec(email="a@b.com")] + profiles = IdentityResolver(records).resolve() + assert len(profiles) == 1 + + +def test_shared_password_merges_clusters(): + # password must be > 6 chars to be used as a pivot key + records = [ + _rec(email="a@b.com", password="shared_password_long"), + _rec(email="c@d.com", password="shared_password_long"), + ] + profiles = IdentityResolver(records).resolve() + assert len(profiles) == 1 + + +def test_distinct_records_separate_clusters(): + records = [ + _rec(email="a@b.com", password="uniquepassword1"), + _rec(email="c@d.com", password="uniquepassword2"), + ] + profiles = IdentityResolver(records).resolve() + assert len(profiles) == 2 + + +def test_empty_records(): + profiles = IdentityResolver([]).resolve() + assert profiles == [] + + +def test_hvt_flag_propagates(): + records = [_rec(email="admin@corp.com", password="secretpass")] + profiles = IdentityResolver(records).resolve() + assert profiles[0].is_hvt is True diff --git a/tests/test_reporting.py b/tests/test_reporting.py new file mode 100644 index 0000000..8bd3c10 --- /dev/null +++ b/tests/test_reporting.py @@ -0,0 +1,39 @@ +"""tests/test_reporting.py — Unit tests for build_exec_summary.""" +import sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from sources.helpers.reporting import build_exec_summary + + +def test_empty_records(): + summary = build_exec_summary({"records": [], "analysis": {}, "scan_meta": {}}) + assert summary["total_records"] == 0 + assert summary["cleartext_passwords"] == 0 + assert summary["nodes_discovered"] == 0 + + +def test_counts_cleartext(): + class R: + email = "a@b.com"; username = ""; password = "secret"; risk_score = 50.0; is_hvt = False + summary = build_exec_summary({"records": [R()], "analysis": {}, "scan_meta": {}}) + assert summary["cleartext_passwords"] == 1 + assert summary["total_records"] == 1 + + +def test_hvt_count(): + class R: + email = "admin@corp.com"; username = ""; password = ""; risk_score = 80.0; is_hvt = True + summary = build_exec_summary({"records": [R()], "analysis": {}, "scan_meta": {}}) + assert summary["hvt_count"] >= 1 + + +def test_bucket_critical(): + class R: + email = "x@y.com"; username = ""; password = "pw"; risk_score = 95.0; is_hvt = False + summary = build_exec_summary({"records": [R()], "analysis": {}, "scan_meta": {}}) + assert summary["buckets"]["Critical"] == 1 + + +def test_elapsed_formatting(): + summary = build_exec_summary({"records": [], "analysis": {}, "scan_meta": {"elapsed_seconds": 12.5}}) + assert summary["elapsed"] == "12.5s" diff --git a/tests/test_risk.py b/tests/test_risk.py new file mode 100644 index 0000000..317d684 --- /dev/null +++ b/tests/test_risk.py @@ -0,0 +1,38 @@ +"""tests/test_risk.py — Unit tests for RiskEngine boundary values.""" +import sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from nox import Record, RiskEngine, Severity + + +def _make(password="", breach_date="", source="TestSource", email="test@example.com"): + r = Record(source=source, email=email, password=password, breach_date=breach_date) + return RiskEngine.score(r) + + +def test_score_returns_float(): + r = _make(password="hunter2") + assert isinstance(r.risk_score, float) + + +def test_score_in_range(): + r = _make(password="hunter2") + assert 0.0 <= r.risk_score <= 100.0 + + +def test_no_password_lower_score(): + with_pw = _make(password="secret123") + without_pw = _make(password="") + assert with_pw.risk_score >= without_pw.risk_score + + +def test_cleartext_password_raises_severity(): + r = _make(password="P@ssw0rd!") + assert r.severity in (Severity.HIGH, Severity.CRITICAL, Severity.MEDIUM) + + +def test_persistence_does_not_crash(): + records = [_make(password="reused", email="a@b.com"), + _make(password="reused", email="a@b.com")] + result = RiskEngine.apply_persistence(records) + assert len(result) == 2 diff --git a/tests/test_scanner.py b/tests/test_scanner.py new file mode 100644 index 0000000..a79ec2d --- /dev/null +++ b/tests/test_scanner.py @@ -0,0 +1,90 @@ +"""tests/test_scanner.py — Unit tests for AvalancheScanner dedup and depth cap.""" +import asyncio +import sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from sources.helpers.scanner import AvalancheScanner, _extract_ids_from_text as _extract_new_ids, _ids_from_records + + +# ── _extract_new_ids ────────────────────────────────────────────────── + +def test_extract_email(): + ids = _extract_new_ids("contact user@example.com for info") + assert ("user@example.com", "email") in ids + + +def test_extract_username_from_github(): + ids = _extract_new_ids("see github.com/johndoe for code") + assert ("johndoe", "username") in ids + + +def test_extract_no_false_positives(): + ids = _extract_new_ids("no identifiers here at all") + assert ids == [] + + +# ── seen_assets dedup ───────────────────────────────────────────────── + +class _FakeOrchestrator: + """Minimal orchestrator stub — records how many times each asset is scanned.""" + def __init__(self): + self.scan_calls = [] + self.dorking_engine = _FakeDorkingEngine() + + async def _full_async_scan(self, asset, qtype): + self.scan_calls.append(asset) + return [] + + def dork(self, asset, query_type=None): + return [] + + def scrape(self, asset, query_type=None): + return {"pastes": [], "credentials": [], "hashes": [], "telegram": [], "dork_misconfigs": []} + + +class _FakeDorkingEngine: + async def async_search(self, session, asset, qtype): + return [] + + +def test_seen_assets_prevents_duplicate_scan(): + orc = _FakeOrchestrator() + scanner = AvalancheScanner(orc) + + async def _run(): + scanner.seen_assets.add("target@example.com") + await asyncio.gather( + scanner._process("target@example.com", depth=0, parent=None, found_in="seed"), + scanner._process("target@example.com", depth=0, parent=None, found_in="seed"), + ) + + asyncio.run(_run()) + # Should only have been scanned once (or zero times since it was pre-added to seen_assets) + assert orc.scan_calls.count("target@example.com") <= 1 + + +def test_depth_cap_respected(): + orc = _FakeOrchestrator() + scanner = AvalancheScanner(orc) + + async def _run(): + await scanner._process("deep@example.com", depth=99, parent=None, found_in="seed") + + asyncio.run(_run()) + assert "deep@example.com" not in orc.scan_calls + + +def test_global_dork_url_dedup(): + orc = _FakeOrchestrator() + scanner = AvalancheScanner(orc) + scanner._seen_dork_urls.add("https://example.com/leak") + + # Simulate accumulating a hit with a URL already seen + hit = {"url": "https://example.com/leak", "title": "Leak", "snippet": ""} + initial_len = len(scanner._dork_hits) + url = hit.get("url", "") + if url and url not in scanner._seen_dork_urls: + scanner._seen_dork_urls.add(url) + scanner._dork_hits.append(hit) + + assert len(scanner._dork_hits) == initial_len # not added — already seen