Refactored the extraction script for easier integration of new images/symbols

2026-06-08 16:37:12 +00:00 · 2023-11-29 14:28:17 +01:00
parent 4c2449cfd4
commit eeefd835fe
4 changed files with 43 additions and 23 deletions
@@ -17,8 +17,12 @@ THREADS_LIMIT = None
 CSVLock = threading.Lock()

 machineType = dict(x86=332, x64=34404)
-knownImageVersions = dict(ntoskrnl=list(), wdigest=list(), ci=list())
-extensions_by_mode = dict(ntoskrnl="exe", wdigest="dll", ci="dll")
+supported_images = ["ntoskrnl.exe", "wdigest.dll", "ci.dll"]
+modes = [image_name.split(".")[0] for image_name in supported_images]
+extensions_by_mode = dict(image_name.split(".") for image_name in supported_images)
+known_image_versions = {mode: list() for mode in modes}
+modes_by_imagename = dict(zip(supported_images, modes))
+csvFilenameByMode = {mode: mode.capitalize() + "Offsets.csv" for mode in modes}

 symbols = dict(
    ntoskrnl=[
@@ -44,6 +48,8 @@ symbols = dict(
    ],
 )

+symbols_names = {mode: [t[0] if t[-1] == "symbol" else f"{t[0]}_{t[1]}" for t in symbols[mode]] for mode in modes}
+

 def find(key: str, d: dict):
    for k, v in d.items():
@@ -252,12 +258,10 @@ def extractOffsets(input_file, output_file, mode):
            export_directory_rva = export_directory_entry.VirtualAddress
            image_name_rva = pe.get_dword_at_rva(export_directory_rva + 3 * 4)
            name = pe.get_string_at_rva(image_name_rva).decode().lower()
-            if "ntoskrnl.exe" in name:
-                imageType = "ntoskrnl"
-            elif "wdigest.dll" in name:
-                imageType = "wdigest"
-            elif "ci.dll" in name:
-                imageType = "ci"
+            for image_name in supported_images:
+                if image_name in name:
+                    imageType = modes_by_imagename[image_name]
+                    break
            else:
                print(f"[*] File {input_file} unrecognized")
                return
@@ -274,7 +278,7 @@ def extractOffsets(input_file, output_file, mode):
            extension = extensions_by_mode[imageType]
            imageVersion = f"{imageType}_{full_version[2]}-{full_version[3]}.{extension}"

-            if imageVersion in knownImageVersions[imageType]:
+            if imageVersion in known_image_versions[imageType]:
                print(f"[*] Skipping known {imageType} version {imageVersion} (file: {input_file})")
                try:
                    """
@@ -296,7 +300,7 @@ def extractOffsets(input_file, output_file, mode):
                        for part in input_file_basename[len(f"{imageType}_") : -len(f".{extension}")].split("-")
                    )
                    imageVersion = input_file_basename
-                    if imageVersion in knownImageVersions[imageType]:
+                    if imageVersion in known_image_versions[imageType]:
                        return
                    print("\r", end="")  # Not skipping after all
                except ValueError:
@@ -330,7 +334,7 @@ def extractOffsets(input_file, output_file, mode):

            # print("wrote into CSV !")
            del pdb
-            knownImageVersions[imageType].append(imageVersion)
+            known_image_versions[imageType].append(imageVersion)
            print(f"[+] Finished processing of {imageType} {input_file}!")

        except PEFormatError as e:
@@ -381,22 +385,25 @@ def sortOutputFile(csvFile):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()

+    modes_str = "/".join(known_image_versions)
+    files = " / ".join(modes_by_imagename)
+    csvfiles = " / ".join(csvFilenameByMode.values())
    parser.add_argument(
        "mode",
-        help='"ntoskrnl", "wdigest" or "ci". Mode to download and extract offsets from either ntoskrnl.exe, wdigest.dll or ci.dll',
+        help=f"{modes_str}. Mode to download and extract offsets from either {files}",
    )
    parser.add_argument(
        "-i",
        "--input",
        dest="input",
        required=True,
-        help="Single file or directory containing ntoskrnl.exe / wdigest.dll / ci.dll to extract offsets from. If in download mode, the PE downloaded from MS symbols servers will be placed in this folder.",
+        help=f"Single file or directory containing {files} to extract offsets from. If in download mode, the PE downloaded from MS symbols servers will be placed in this folder.",
    )
    parser.add_argument(
        "-o",
        "--output",
        dest="output",
-        help="CSV file to write offsets to. If the specified file already exists, only new ntoskrnl versions will be downloaded / analyzed. Defaults to NtoskrnlOffsets.csv / WdigestOffsets.csv / CiOffsets.csv in the current folder.",
+        help=f"CSV file to write offsets to. If the specified file already exists, only new ntoskrnl versions will be downloaded / analyzed. Defaults to {csvfiles} in the current folder.",
    )
    parser.add_argument(
        "-d",
@@ -408,20 +415,20 @@ if __name__ == "__main__":

    args = parser.parse_args()
    mode = args.mode.lower()
-    if mode not in knownImageVersions:
-        print(f'[!] ERROR : unsupported mode "{args.mode}", supported mode are: "ntoskrnl", "wdigest" and "ci"')
+    if mode not in known_image_versions:
+        print(f'[!] ERROR : unsupported mode "{args.mode}", supported mode are: {modes}')
        exit(1)

    # If the output file exists, load the already analyzed image versions.
    # Otherwise, write CSV headers to the new file.
    if not args.output:
-        args.output = mode.capitalize() + "Offsets.csv"
+        args.output = csvFilenameByMode[mode]
    if os.path.isfile(args.output):
-        loadOffsetsFromCSV(knownImageVersions[mode], args.output)
-        print(f'[+] Loaded {len(knownImageVersions[mode])} known {mode} versions from "{args.output}"')
+        loadOffsetsFromCSV(known_image_versions[mode], args.output)
+        print(f'[+] Loaded {len(known_image_versions[mode])} known {mode} versions from "{args.output}"')
    else:
        with open(args.output, "w") as output:
-            output.write(mode + "Version," + ",".join(elem[0] for elem in symbols[mode]) + "\n")
+            output.write(mode + "Version," + ",".join(elem for elem in symbols_names[mode]) + "\n")

    # In download mode, an updated list of image versions published will be retrieved from https://winbindex.m417z.com.
    # The symbols for each version will be downloaded from the Microsoft symbols servers.
@@ -431,7 +438,7 @@ if __name__ == "__main__":
            print("[!] ERROR : in download mode, -i / --input option must specify a folder")
            exit(1)
        extension = extensions_by_mode[mode]
-        downloadPEFileFromMS(mode, extension, knownImageVersions[mode], args.input)
+        downloadPEFileFromMS(mode, extension, known_image_versions[mode], args.input)

    # Extract the offsets from the specified file or the folders containing image files.
    extractOffsets(args.input, args.output, mode)