Merge branch 'master' into g_CiOptionExtract

2026-06-10 09:27:19 +00:00 · 2023-10-06 10:24:05 +02:00
parent a3966d34b3 a561976b5d
commit 75b0168045
3 changed files with 51 additions and 27 deletions
@@ -65,7 +65,7 @@ void SaveNtoskrnlOffsetsToFile(TCHAR* ntoskrnlOffsetFilename) {
    for (int i = 0; i < _SUPPORTED_NTOSKRNL_OFFSETS_END; i++) {
        _ftprintf(offsetFileStream, TEXT(",%llx"), g_ntoskrnlOffsets.ar[i]);
    }
-    _fputts(TEXT(""), offsetFileStream);
+    _fputts(TEXT("\n"), offsetFileStream);
    fclose(offsetFileStream);
 }
@@ -63,7 +63,7 @@ void SaveWdigestOffsetsToFile(TCHAR* wdigestOffsetFilename) {
    for (int i = 0; i < _SUPPORTED_WDIGEST_OFFSETS_END; i++) {
        _ftprintf(offsetFileStream, TEXT(",%llx"), g_wdigestOffsets.ar[i]);
    }
-    _fputts(TEXT(""), offsetFileStream);
+    _fputts(TEXT("\n"), offsetFileStream);
    fclose(offsetFileStream);
 }
@@ -5,17 +5,30 @@ import sys
 from requests import get
 from gzip import decompress
-from json import loads, dumps
+from json import loads
 import subprocess
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading
 CSVLock = threading.Lock()
 machineType = dict(x86=332, x64=34404)
 knownImageVersions = dict(ntoskrnl=list(), wdigest=list(),ci=list())
 extensions_by_mode = dict(ntoskrnl="exe", wdigest="dll",ci="dll")
 def find(key, value):
    for k, v in value.items():
        if k == key:
            return v
        elif isinstance(v, dict):
            return find(key, v)
    return None
 def printl(s, lock, **kwargs):
    with lock:
        print(s, **kwargs)
 def run(args, **kargs):
    """Wrap subprocess.run to works on Windows and Linux"""
    # Windows needs shell to be True, to locate binary automatically
@@ -23,54 +36,57 @@ def run(args, **kargs):
    shell = sys.platform in ["win32"]
    return subprocess.run(args, shell=shell, **kargs)
-def downloadSpecificFile(entry, pe_basename, pe_ext, knownPEVersions, output_folder):
+def downloadSpecificFile(entry, pe_basename, pe_ext, knownPEVersions, output_folder, lock):
    pe_name = f'{pe_basename}.{pe_ext}'
    if 'fileInfo' not in entry:
-        # print(f'[!] Entry {pe_hash} has no fileInfo, skipping it.')
+        # printl(f'[!] Entry {pe_hash} has no fileInfo, skipping it.', lock)
        return "SKIP"
    if 'timestamp' not in entry['fileInfo']:
-        # print(f'[!] Entry {pe_hash} has no timestamp, skipping it.')
+        # printl(f'[!] Entry has no timestamp, skipping it.', lock)
        return "SKIP"
    timestamp = entry['fileInfo']['timestamp']
    if 'virtualSize' not in entry['fileInfo']:
-        # print(f'[!] Entry {pe_hash} has no virtualSize, skipping it.')
+        # printl(f'[!] Entry has no virtualSize, skipping it.', lock)
        return "SKIP"
    if "machineType" not in entry["fileInfo"] or entry["fileInfo"]["machineType"] != machineType["x64"]:
        # printl('No machine Type', lock)
        return "SKIP"
    virtual_size = entry['fileInfo']['virtualSize']
    file_id = hex(timestamp).replace('0x','').zfill(8).upper() + hex(virtual_size).replace('0x','')
    url = 'https://msdl.microsoft.com/download/symbols/' + pe_name + '/' + file_id + '/' + pe_name
    # fix download error, sometimes version does not exist
    try:
        version = entry['fileInfo']['version'].split(' ')[0]
-    except KeyError:
+    except:
-        print(f"{url} version is unknown.")
+        version = find('version', entry).split(' ')[0]
    if not version:
        printl(f'[*] Error parsing version', lock)
        return "SKIP"
-    
+
    # Output file format: <PE>_build-revision.<exe | dll>
    output_version = '-'.join(version.split('.')[-2:])
    output_file = f'{pe_basename}_{output_version}.{pe_ext}'
    # If the PE version is already known, skip download.
    if output_file in knownPEVersions:
-        print(f'[*] Skipping download of known {pe_name} version: {output_file}')
+        printl(f'[*] Skipping download of known {pe_name} version: {output_file}', lock)
        return "SKIP"
    output_file_path = os.path.join(output_folder, output_file)
    if os.path.isfile(output_file_path):
-        print(f"[*] Skipping {output_file_path} which already exists")
+        printl(f"[*] Skipping {output_file_path} which already exists", lock)
        return "SKIP"
-    print(f'[*] Downloading {pe_name} version {version}... ')
+    # printl(f'[*] Downloading {pe_name} version {version}... ', lock)
    try:
        peContent = get(url)
        with open(output_file_path, 'wb') as f:
            f.write(peContent.content)
-        print(f'[+] Finished download of {pe_name} version {version} (file: {output_file})!')
+        printl(f'[+] Finished download of {pe_name} version {version} (file: {output_file})!', lock)
        return "OK"
-    except Exception:
+    except Exception as e:
-        print(f'[!] ERROR : Could not download {pe_name} version {version} (URL: {url}).')
+        printl(f'[!] ERROR : Could not download {pe_name} version {version} (URL: {url}): {str(e)}.', lock)
        return "KO"
 def downloadPEFileFromMS(pe_basename, pe_ext, knownPEVersions, output_folder):
@@ -83,13 +99,16 @@ def downloadPEFileFromMS(pe_basename, pe_ext, knownPEVersions, output_folder):
    pe_list = loads(pe_json)
    futures = dict()
    i = 0
    futures = set()
    lock = threading.Lock()
    with ThreadPoolExecutor() as executor:
        for pe_hash in pe_list:
            entry = pe_list[pe_hash]
-            futures[pe_hash] = executor.submit(downloadSpecificFile,entry, pe_basename, pe_ext, knownPEVersions, output_folder)
+            futures.add(executor.submit(downloadSpecificFile, entry, pe_basename, pe_ext, knownPEVersions, output_folder, lock))
-    for (i,f) in enumerate(futures):
+        for future in as_completed(futures):
-        res = futures[f].result()
+            printl(f"{i + 1}/{len(pe_list)}", lock, end="\r")
-        print(f"{i+1}/{len(futures)}", end="\r")
+            i += 1
 def get_symbol_offset(symbols_info, symbol_name):
    for line in symbols_info:
@@ -155,7 +174,7 @@ def extractOffsets(input_file, output_file, mode):
                return
-            print(f'[*] Processing {imageType} version {imageVersion} (file: {input_file})')
+            # print(f'[*] Processing {imageType} version {imageVersion} (file: {input_file})')
            # download the PDB if needed
            r = run(["r2", "-c", "idpd", "-qq", input_file], capture_output=True)
            # dump all symbols
@@ -209,8 +228,8 @@ def extractOffsets(input_file, output_file, mode):
        print(f'[*] Processing folder: {input_file}')
        with ThreadPoolExecutor() as extractorPool:
            args = [(os.path.join(input_file, file), output_file, mode) for file in os.listdir(input_file)]
-            for (i,res) in enumerate(extractorPool.map(extractOffsets, *zip(*args))):
+            for (i, res) in enumerate(extractorPool.map(extractOffsets, *zip(*args))):
-                print(f"{i+1}/{len(args)}", end="\r")
+                print(f"{i + 1}/{len(args)}", end="\r")
        print(f'[+] Finished processing of folder {input_file}!')
    else:
@@ -252,8 +271,13 @@ if __name__ == '__main__':
        print(r.stderr)
        exit(r.returncode)
    output = r.stdout.decode()
-    ma,me,mi = map(int, output.splitlines()[0].split(" ")[0].split("."))
+    """
-    if (ma, me, mi) < (5,0,0):
+    can be:
     * a series of lines like "5.5.0  r2\n5.5.0  r_lib\n[...]"
     * a simple tag "5.8.2-158-gca9763f20d"
    """
    ma,me,mi = map(int, output.splitlines()[0].split(" ")[0].split("-")[0].split("."))
    if (ma, me, mi) < (5, 0, 0):
        print("WARNING : This script has been tested with radare2 5.0.0 (works) and 4.3.1 (does NOT work)")
        print(f"You have version {ma}.{me}.{mi}, if is does not work correctly, meaning most of the offsets are not found (i.e. 0), check radare2's 'idpi' command output and modify get_symbol_offset() & get_field_offset() to parse symbols correctly")
        input("Press enter to continue")