Merge branch 'master' into g_CiOptionExtract

This commit is contained in:
Maxime Meignan
2023-10-06 10:24:05 +02:00
committed by GitHub
3 changed files with 51 additions and 27 deletions
+49 -25
View File
@@ -5,17 +5,30 @@ import sys
from requests import get
from gzip import decompress
from json import loads, dumps
from json import loads
import subprocess
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
CSVLock = threading.Lock()
machineType = dict(x86=332, x64=34404)
knownImageVersions = dict(ntoskrnl=list(), wdigest=list(),ci=list())
extensions_by_mode = dict(ntoskrnl="exe", wdigest="dll",ci="dll")
def find(key, value):
for k, v in value.items():
if k == key:
return v
elif isinstance(v, dict):
return find(key, v)
return None
def printl(s, lock, **kwargs):
with lock:
print(s, **kwargs)
def run(args, **kargs):
"""Wrap subprocess.run to works on Windows and Linux"""
# Windows needs shell to be True, to locate binary automatically
@@ -23,54 +36,57 @@ def run(args, **kargs):
shell = sys.platform in ["win32"]
return subprocess.run(args, shell=shell, **kargs)
def downloadSpecificFile(entry, pe_basename, pe_ext, knownPEVersions, output_folder):
def downloadSpecificFile(entry, pe_basename, pe_ext, knownPEVersions, output_folder, lock):
pe_name = f'{pe_basename}.{pe_ext}'
if 'fileInfo' not in entry:
# print(f'[!] Entry {pe_hash} has no fileInfo, skipping it.')
# printl(f'[!] Entry {pe_hash} has no fileInfo, skipping it.', lock)
return "SKIP"
if 'timestamp' not in entry['fileInfo']:
# print(f'[!] Entry {pe_hash} has no timestamp, skipping it.')
# printl(f'[!] Entry has no timestamp, skipping it.', lock)
return "SKIP"
timestamp = entry['fileInfo']['timestamp']
if 'virtualSize' not in entry['fileInfo']:
# print(f'[!] Entry {pe_hash} has no virtualSize, skipping it.')
# printl(f'[!] Entry has no virtualSize, skipping it.', lock)
return "SKIP"
if "machineType" not in entry["fileInfo"] or entry["fileInfo"]["machineType"] != machineType["x64"]:
# printl('No machine Type', lock)
return "SKIP"
virtual_size = entry['fileInfo']['virtualSize']
file_id = hex(timestamp).replace('0x','').zfill(8).upper() + hex(virtual_size).replace('0x','')
url = 'https://msdl.microsoft.com/download/symbols/' + pe_name + '/' + file_id + '/' + pe_name
# fix download error, sometimes version does not exist
try:
version = entry['fileInfo']['version'].split(' ')[0]
except KeyError:
print(f"{url} version is unknown.")
except:
version = find('version', entry).split(' ')[0]
if not version:
printl(f'[*] Error parsing version', lock)
return "SKIP"
# Output file format: <PE>_build-revision.<exe | dll>
output_version = '-'.join(version.split('.')[-2:])
output_file = f'{pe_basename}_{output_version}.{pe_ext}'
# If the PE version is already known, skip download.
if output_file in knownPEVersions:
print(f'[*] Skipping download of known {pe_name} version: {output_file}')
printl(f'[*] Skipping download of known {pe_name} version: {output_file}', lock)
return "SKIP"
output_file_path = os.path.join(output_folder, output_file)
if os.path.isfile(output_file_path):
print(f"[*] Skipping {output_file_path} which already exists")
printl(f"[*] Skipping {output_file_path} which already exists", lock)
return "SKIP"
print(f'[*] Downloading {pe_name} version {version}... ')
# printl(f'[*] Downloading {pe_name} version {version}... ', lock)
try:
peContent = get(url)
with open(output_file_path, 'wb') as f:
f.write(peContent.content)
print(f'[+] Finished download of {pe_name} version {version} (file: {output_file})!')
printl(f'[+] Finished download of {pe_name} version {version} (file: {output_file})!', lock)
return "OK"
except Exception:
print(f'[!] ERROR : Could not download {pe_name} version {version} (URL: {url}).')
except Exception as e:
printl(f'[!] ERROR : Could not download {pe_name} version {version} (URL: {url}): {str(e)}.', lock)
return "KO"
def downloadPEFileFromMS(pe_basename, pe_ext, knownPEVersions, output_folder):
@@ -83,13 +99,16 @@ def downloadPEFileFromMS(pe_basename, pe_ext, knownPEVersions, output_folder):
pe_list = loads(pe_json)
futures = dict()
i = 0
futures = set()
lock = threading.Lock()
with ThreadPoolExecutor() as executor:
for pe_hash in pe_list:
entry = pe_list[pe_hash]
futures[pe_hash] = executor.submit(downloadSpecificFile,entry, pe_basename, pe_ext, knownPEVersions, output_folder)
for (i,f) in enumerate(futures):
res = futures[f].result()
print(f"{i+1}/{len(futures)}", end="\r")
futures.add(executor.submit(downloadSpecificFile, entry, pe_basename, pe_ext, knownPEVersions, output_folder, lock))
for future in as_completed(futures):
printl(f"{i + 1}/{len(pe_list)}", lock, end="\r")
i += 1
def get_symbol_offset(symbols_info, symbol_name):
for line in symbols_info:
@@ -155,7 +174,7 @@ def extractOffsets(input_file, output_file, mode):
return
print(f'[*] Processing {imageType} version {imageVersion} (file: {input_file})')
# print(f'[*] Processing {imageType} version {imageVersion} (file: {input_file})')
# download the PDB if needed
r = run(["r2", "-c", "idpd", "-qq", input_file], capture_output=True)
# dump all symbols
@@ -209,8 +228,8 @@ def extractOffsets(input_file, output_file, mode):
print(f'[*] Processing folder: {input_file}')
with ThreadPoolExecutor() as extractorPool:
args = [(os.path.join(input_file, file), output_file, mode) for file in os.listdir(input_file)]
for (i,res) in enumerate(extractorPool.map(extractOffsets, *zip(*args))):
print(f"{i+1}/{len(args)}", end="\r")
for (i, res) in enumerate(extractorPool.map(extractOffsets, *zip(*args))):
print(f"{i + 1}/{len(args)}", end="\r")
print(f'[+] Finished processing of folder {input_file}!')
else:
@@ -252,8 +271,13 @@ if __name__ == '__main__':
print(r.stderr)
exit(r.returncode)
output = r.stdout.decode()
ma,me,mi = map(int, output.splitlines()[0].split(" ")[0].split("."))
if (ma, me, mi) < (5,0,0):
"""
can be:
* a series of lines like "5.5.0 r2\n5.5.0 r_lib\n[...]"
* a simple tag "5.8.2-158-gca9763f20d"
"""
ma,me,mi = map(int, output.splitlines()[0].split(" ")[0].split("-")[0].split("."))
if (ma, me, mi) < (5, 0, 0):
print("WARNING : This script has been tested with radare2 5.0.0 (works) and 4.3.1 (does NOT work)")
print(f"You have version {ma}.{me}.{mi}, if is does not work correctly, meaning most of the offsets are not found (i.e. 0), check radare2's 'idpi' command output and modify get_symbol_offset() & get_field_offset() to parse symbols correctly")
input("Press enter to continue")