Merge branch 'master' into g_CiOptionExtract

This commit is contained in:
Maxime Meignan
2023-10-06 10:24:05 +02:00
committed by GitHub
3 changed files with 51 additions and 27 deletions
+1 -1
View File
@@ -65,7 +65,7 @@ void SaveNtoskrnlOffsetsToFile(TCHAR* ntoskrnlOffsetFilename) {
for (int i = 0; i < _SUPPORTED_NTOSKRNL_OFFSETS_END; i++) { for (int i = 0; i < _SUPPORTED_NTOSKRNL_OFFSETS_END; i++) {
_ftprintf(offsetFileStream, TEXT(",%llx"), g_ntoskrnlOffsets.ar[i]); _ftprintf(offsetFileStream, TEXT(",%llx"), g_ntoskrnlOffsets.ar[i]);
} }
_fputts(TEXT(""), offsetFileStream); _fputts(TEXT("\n"), offsetFileStream);
fclose(offsetFileStream); fclose(offsetFileStream);
} }
+1 -1
View File
@@ -63,7 +63,7 @@ void SaveWdigestOffsetsToFile(TCHAR* wdigestOffsetFilename) {
for (int i = 0; i < _SUPPORTED_WDIGEST_OFFSETS_END; i++) { for (int i = 0; i < _SUPPORTED_WDIGEST_OFFSETS_END; i++) {
_ftprintf(offsetFileStream, TEXT(",%llx"), g_wdigestOffsets.ar[i]); _ftprintf(offsetFileStream, TEXT(",%llx"), g_wdigestOffsets.ar[i]);
} }
_fputts(TEXT(""), offsetFileStream); _fputts(TEXT("\n"), offsetFileStream);
fclose(offsetFileStream); fclose(offsetFileStream);
} }
+49 -25
View File
@@ -5,17 +5,30 @@ import sys
from requests import get from requests import get
from gzip import decompress from gzip import decompress
from json import loads, dumps from json import loads
import subprocess import subprocess
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor, as_completed
import threading import threading
CSVLock = threading.Lock() CSVLock = threading.Lock()
machineType = dict(x86=332, x64=34404) machineType = dict(x86=332, x64=34404)
knownImageVersions = dict(ntoskrnl=list(), wdigest=list(),ci=list()) knownImageVersions = dict(ntoskrnl=list(), wdigest=list(),ci=list())
extensions_by_mode = dict(ntoskrnl="exe", wdigest="dll",ci="dll") extensions_by_mode = dict(ntoskrnl="exe", wdigest="dll",ci="dll")
def find(key, value):
for k, v in value.items():
if k == key:
return v
elif isinstance(v, dict):
return find(key, v)
return None
def printl(s, lock, **kwargs):
with lock:
print(s, **kwargs)
def run(args, **kargs): def run(args, **kargs):
"""Wrap subprocess.run to works on Windows and Linux""" """Wrap subprocess.run to works on Windows and Linux"""
# Windows needs shell to be True, to locate binary automatically # Windows needs shell to be True, to locate binary automatically
@@ -23,54 +36,57 @@ def run(args, **kargs):
shell = sys.platform in ["win32"] shell = sys.platform in ["win32"]
return subprocess.run(args, shell=shell, **kargs) return subprocess.run(args, shell=shell, **kargs)
def downloadSpecificFile(entry, pe_basename, pe_ext, knownPEVersions, output_folder): def downloadSpecificFile(entry, pe_basename, pe_ext, knownPEVersions, output_folder, lock):
pe_name = f'{pe_basename}.{pe_ext}' pe_name = f'{pe_basename}.{pe_ext}'
if 'fileInfo' not in entry: if 'fileInfo' not in entry:
# print(f'[!] Entry {pe_hash} has no fileInfo, skipping it.') # printl(f'[!] Entry {pe_hash} has no fileInfo, skipping it.', lock)
return "SKIP" return "SKIP"
if 'timestamp' not in entry['fileInfo']: if 'timestamp' not in entry['fileInfo']:
# print(f'[!] Entry {pe_hash} has no timestamp, skipping it.') # printl(f'[!] Entry has no timestamp, skipping it.', lock)
return "SKIP" return "SKIP"
timestamp = entry['fileInfo']['timestamp'] timestamp = entry['fileInfo']['timestamp']
if 'virtualSize' not in entry['fileInfo']: if 'virtualSize' not in entry['fileInfo']:
# print(f'[!] Entry {pe_hash} has no virtualSize, skipping it.') # printl(f'[!] Entry has no virtualSize, skipping it.', lock)
return "SKIP" return "SKIP"
if "machineType" not in entry["fileInfo"] or entry["fileInfo"]["machineType"] != machineType["x64"]: if "machineType" not in entry["fileInfo"] or entry["fileInfo"]["machineType"] != machineType["x64"]:
# printl('No machine Type', lock)
return "SKIP" return "SKIP"
virtual_size = entry['fileInfo']['virtualSize'] virtual_size = entry['fileInfo']['virtualSize']
file_id = hex(timestamp).replace('0x','').zfill(8).upper() + hex(virtual_size).replace('0x','') file_id = hex(timestamp).replace('0x','').zfill(8).upper() + hex(virtual_size).replace('0x','')
url = 'https://msdl.microsoft.com/download/symbols/' + pe_name + '/' + file_id + '/' + pe_name url = 'https://msdl.microsoft.com/download/symbols/' + pe_name + '/' + file_id + '/' + pe_name
# fix download error, sometimes version does not exist
try: try:
version = entry['fileInfo']['version'].split(' ')[0] version = entry['fileInfo']['version'].split(' ')[0]
except KeyError: except:
print(f"{url} version is unknown.") version = find('version', entry).split(' ')[0]
if not version:
printl(f'[*] Error parsing version', lock)
return "SKIP" return "SKIP"
# Output file format: <PE>_build-revision.<exe | dll> # Output file format: <PE>_build-revision.<exe | dll>
output_version = '-'.join(version.split('.')[-2:]) output_version = '-'.join(version.split('.')[-2:])
output_file = f'{pe_basename}_{output_version}.{pe_ext}' output_file = f'{pe_basename}_{output_version}.{pe_ext}'
# If the PE version is already known, skip download. # If the PE version is already known, skip download.
if output_file in knownPEVersions: if output_file in knownPEVersions:
print(f'[*] Skipping download of known {pe_name} version: {output_file}') printl(f'[*] Skipping download of known {pe_name} version: {output_file}', lock)
return "SKIP" return "SKIP"
output_file_path = os.path.join(output_folder, output_file) output_file_path = os.path.join(output_folder, output_file)
if os.path.isfile(output_file_path): if os.path.isfile(output_file_path):
print(f"[*] Skipping {output_file_path} which already exists") printl(f"[*] Skipping {output_file_path} which already exists", lock)
return "SKIP" return "SKIP"
print(f'[*] Downloading {pe_name} version {version}... ') # printl(f'[*] Downloading {pe_name} version {version}... ', lock)
try: try:
peContent = get(url) peContent = get(url)
with open(output_file_path, 'wb') as f: with open(output_file_path, 'wb') as f:
f.write(peContent.content) f.write(peContent.content)
print(f'[+] Finished download of {pe_name} version {version} (file: {output_file})!') printl(f'[+] Finished download of {pe_name} version {version} (file: {output_file})!', lock)
return "OK" return "OK"
except Exception: except Exception as e:
print(f'[!] ERROR : Could not download {pe_name} version {version} (URL: {url}).') printl(f'[!] ERROR : Could not download {pe_name} version {version} (URL: {url}): {str(e)}.', lock)
return "KO" return "KO"
def downloadPEFileFromMS(pe_basename, pe_ext, knownPEVersions, output_folder): def downloadPEFileFromMS(pe_basename, pe_ext, knownPEVersions, output_folder):
@@ -83,13 +99,16 @@ def downloadPEFileFromMS(pe_basename, pe_ext, knownPEVersions, output_folder):
pe_list = loads(pe_json) pe_list = loads(pe_json)
futures = dict() futures = dict()
i = 0
futures = set()
lock = threading.Lock()
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
for pe_hash in pe_list: for pe_hash in pe_list:
entry = pe_list[pe_hash] entry = pe_list[pe_hash]
futures[pe_hash] = executor.submit(downloadSpecificFile,entry, pe_basename, pe_ext, knownPEVersions, output_folder) futures.add(executor.submit(downloadSpecificFile, entry, pe_basename, pe_ext, knownPEVersions, output_folder, lock))
for (i,f) in enumerate(futures): for future in as_completed(futures):
res = futures[f].result() printl(f"{i + 1}/{len(pe_list)}", lock, end="\r")
print(f"{i+1}/{len(futures)}", end="\r") i += 1
def get_symbol_offset(symbols_info, symbol_name): def get_symbol_offset(symbols_info, symbol_name):
for line in symbols_info: for line in symbols_info:
@@ -155,7 +174,7 @@ def extractOffsets(input_file, output_file, mode):
return return
print(f'[*] Processing {imageType} version {imageVersion} (file: {input_file})') # print(f'[*] Processing {imageType} version {imageVersion} (file: {input_file})')
# download the PDB if needed # download the PDB if needed
r = run(["r2", "-c", "idpd", "-qq", input_file], capture_output=True) r = run(["r2", "-c", "idpd", "-qq", input_file], capture_output=True)
# dump all symbols # dump all symbols
@@ -209,8 +228,8 @@ def extractOffsets(input_file, output_file, mode):
print(f'[*] Processing folder: {input_file}') print(f'[*] Processing folder: {input_file}')
with ThreadPoolExecutor() as extractorPool: with ThreadPoolExecutor() as extractorPool:
args = [(os.path.join(input_file, file), output_file, mode) for file in os.listdir(input_file)] args = [(os.path.join(input_file, file), output_file, mode) for file in os.listdir(input_file)]
for (i,res) in enumerate(extractorPool.map(extractOffsets, *zip(*args))): for (i, res) in enumerate(extractorPool.map(extractOffsets, *zip(*args))):
print(f"{i+1}/{len(args)}", end="\r") print(f"{i + 1}/{len(args)}", end="\r")
print(f'[+] Finished processing of folder {input_file}!') print(f'[+] Finished processing of folder {input_file}!')
else: else:
@@ -252,8 +271,13 @@ if __name__ == '__main__':
print(r.stderr) print(r.stderr)
exit(r.returncode) exit(r.returncode)
output = r.stdout.decode() output = r.stdout.decode()
ma,me,mi = map(int, output.splitlines()[0].split(" ")[0].split(".")) """
if (ma, me, mi) < (5,0,0): can be:
* a series of lines like "5.5.0 r2\n5.5.0 r_lib\n[...]"
* a simple tag "5.8.2-158-gca9763f20d"
"""
ma,me,mi = map(int, output.splitlines()[0].split(" ")[0].split("-")[0].split("."))
if (ma, me, mi) < (5, 0, 0):
print("WARNING : This script has been tested with radare2 5.0.0 (works) and 4.3.1 (does NOT work)") print("WARNING : This script has been tested with radare2 5.0.0 (works) and 4.3.1 (does NOT work)")
print(f"You have version {ma}.{me}.{mi}, if is does not work correctly, meaning most of the offsets are not found (i.e. 0), check radare2's 'idpi' command output and modify get_symbol_offset() & get_field_offset() to parse symbols correctly") print(f"You have version {ma}.{me}.{mi}, if is does not work correctly, meaning most of the offsets are not found (i.e. 0), check radare2's 'idpi' command output and modify get_symbol_offset() & get_field_offset() to parse symbols correctly")
input("Press enter to continue") input("Press enter to continue")