]> xenbits.xensource.com Git - unikraft/unikraft.git/commitdiff
support/scripts: Add `python3` script to patch fake PE header
authorSergiu Moga <sergiu.moga@protonmail.com>
Mon, 27 Mar 2023 09:26:10 +0000 (12:26 +0300)
committerUnikraft <monkey@unikraft.io>
Fri, 11 Aug 2023 10:47:30 +0000 (10:47 +0000)
This script allows patching of the architecture specific fake PE
headers. It only fills in the fields that UEFI firmware looks for
when validating and loading the image. Specifically, it does the
following:
- Write MS-DOS signature in the first bytes of the binary
- Write at the standard MS-DOS file offset `0x3c` the offset to the
beginning in file of the fake PE header
- Append the original ELF file that also contains the PE header
- Fill in the following fields of the Optional Header: SizeOfCode,
AddressOfEntryPoint, BaseOfCode, SizeOfImage
- Fill in the dummy PE sections, as PE, unlike ELF, is loaded by
sections:
- dummy .reloc section pointing to itsel with all fields
zeroed out except the VirtualAddress and PointerToRawData
fields which point to the section itself, to fool UEFI into
thinking this is a valid relocation.
- All PT_LOAD ELF Program Headers will be encapsulated into
PE sections with all permissions enabled (RWX)
For these sections, only the following fields are required to be filed
in: VirtualSize, VirtualAddress, SizeOfRawData, PointerToRawData.

Thus, the script fills in the bare-minimum fields, according to EDKII,
the most complete and official UEFI implementation, that are required
by an UEFI application's PE header to be considered valid and loadable.

Signed-off-by: Sergiu Moga <sergiu.moga@protonmail.com>
Reviewed-by: Michalis Pappas <michalis@unikraft.io>
Approved-by: Razvan Deaconescu <razvand@unikraft.io>
Tested-by: Unikraft CI <monkey@unikraft.io>
GitHub-Closes: #909

plat/common/Makefile.rules
support/scripts/mkefi.py [new file with mode: 0755]

index e52c521029ae0aa3236e9901ff6025da94fc5eef..2eda112ceb279c13adc76019845728430669012f 100644 (file)
@@ -13,3 +13,7 @@ define build_bootinfo =
                $(SCRIPTS_DIR)/mkbootinfo.py $(1) $(1).bootinfo $(BINFO_FLAGS) && \
                $(OBJCOPY) $(1) --update-section .uk_bootinfo=$(1).bootinfo)
 endef
+
+define build_efi =
+       @$(SCRIPTS_DIR)/mkefi.py $(1)
+endef
diff --git a/support/scripts/mkefi.py b/support/scripts/mkefi.py
new file mode 100755 (executable)
index 0000000..939c489
--- /dev/null
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+
+import argparse
+import subprocess
+import os
+import re
+from struct import pack
+
+ELF_MACHINE = {
+    'x86_64': 62,               # EM_X86_64
+    'arm64': 183                # EM_AARCH64
+}
+
+PE_MACHINE = {
+    'x86_64': 0x8664,           # IMAGE_FILE_MACHINE_AMD64
+    'arm64' : 0xaa64            # IMAGE_FILE_MACHINE_ARM64
+}
+
+PE_SEC_SZ = 40
+
+# COFF Characteristics
+COFF_HDR_CHRS = (
+                 0x0002     |  # IMAGE_FILE_EXECUTABLE_IMAGE
+                 0x0004     |  # IMAGE_FILE_LINE_NUMS_STRIPPED
+                 0x0200        # IMAGE_FILE_DEBUG_STRIPPED
+)
+
+# All sections that we obtain from the PT_LOAD Program Headers must be
+# writable, since we have to apply initial relocations. We are going to
+# change permissions ourselves afterwards anyway.
+PE_SEC_CHRS = (
+                0x00000040 |  # IMAGE_SCN_CNT_INITIALIZED_DATA
+                0x00d00000 |  # IMAGE_SCN_ALIGN_4096BYTES
+                0x20000000 |  # IMAGE_SCN_MEM_EXECUTE
+                0x40000000 |  # IMAGE_SCN_MEM_READ
+                0x80000000    # IMAGE_SCN_MEM_WRITE
+)
+
+MZ_PE_HDR = {
+    # MS-DOS Stub (Image Only)
+    "MZ_MAGIC"                   : (0x0000000000005a4d, 0x02),  # Offset   0
+    "MZ_PAD"                     : (0x0000000000000000, 0x3a),  # Offset   2
+    "PE_HDR_OFF"                 : (0x0000000000000040, 0x04),  # Offset  60
+
+    # Signature (Image Only)
+    "PE_MAGIC"                   : (0x0000000000004550, 0x04),  # Offset  64
+
+    # COFF File Header (Object and Image)
+    "Machine"                    : [0x0000000000000000, 0x02],  # Offset  68
+    "NumberOfSections"           : [0x0000000000000000, 0x02],  # Offset  70
+    "TimeDateStamp"              : (0x0000000000000000, 0x04),  # Offset  72
+    "PointerToSymbolTable"       : (0x0000000000000000, 0x04),  # Offset  76
+    "NumberOfSymbols"            : (0x0000000000000000, 0x04),  # Offset  80
+    "SizeOfOptionalHeader"       : (0x0000000000000070, 0x02),  # Offset  84
+    "Characteristics"            : (   COFF_HDR_CHRS  , 0x02),  # Offset  86
+
+    # Optional Header Standard Fields (Image Only)
+    "Magic"                      : (0x000000000000020b, 0x02),  # Offset  88
+    "MajorLinkerVersion"         : (0x0000000000000000, 0x01),  # Offset  90
+    "MinorLinkerVersion"         : (0x0000000000000000, 0x01),  # Offset  91
+    "SizeOfCode"                 : [0x0000000000000000, 0x04],  # Offset  92
+    "SizeOfInitializedData"      : [0x0000000000000000, 0x04],  # Offset  96
+    "SizeOfUninitializedData"    : [0x0000000000000000, 0x04],  # Offset 100
+    "AddressOfEntryPoint"        : [0x0000000000000000, 0x04],  # Offset 104
+    "BaseOfCode"                 : [0x0000000000000000, 0x04],  # Offset 108
+
+    # Optional Header Windows-Specific Fields (Image Only)
+    "ImageBase"                  : [0x0000000000000000, 0x08],  # Offset 112
+    "SectionAlignment"           : (0x0000000000001000, 0x04),  # Offset 120
+    "FileAlignment"              : (0x0000000000001000, 0x04),  # Offset 124
+    "MajorOperatingSystemVersion": (0x0000000000000000, 0x02),  # Offset 128
+    "MinorOperatingSystemVersion": (0x0000000000000000, 0x02),  # Offset 130
+    "MajorImageVersion"          : (0x0000000000000000, 0x02),  # Offset 132
+    "MinorImageVersion"          : (0x0000000000000000, 0x02),  # Offset 134
+    "MajorSubsystemVersion"      : (0x0000000000000000, 0x02),  # Offset 136
+    "MinorSubsystemVersion"      : (0x0000000000000000, 0x02),  # Offset 138
+    "Win32VersionValue"          : (0x0000000000000000, 0x04),  # Offset 140
+    "SizeOfImage"                : [0x0000000000000000, 0x04],  # Offset 144
+    "SizeOfHeaders"              : [0x0000000000000000, 0x04],  # Offset 148
+    "CheckSum"                   : (0x0000000000000000, 0x04),  # Offset 152
+    "Subsystem"                  : (0x000000000000000a, 0x02),  # Offset 156
+    "DllCharacteristics"         : (0x0000000000000000, 0x02),  # Offset 158
+    "SizeOfStackReserve"         : (0x0000000000000000, 0x08),  # Offset 160
+    "SizeOfStackCommit"          : (0x0000000000000000, 0x08),  # Offset 168
+    "SizeOfHeapReserve"          : (0x0000000000000000, 0x08),  # Offset 176
+    "SizeOfHeapCommit"           : (0x0000000000000000, 0x08),  # Offset 184
+    "LoaderFlags"                : (0x0000000000000000, 0x04),  # Offset 192
+    "NumberOfRvaAndSizes"        : (0x0000000000000000, 0x04),  # Offset 196
+
+    # Section Table (Section Headers)
+    "SectionHeaders"             : [ ]                          # Offset 200
+}
+
+def elf_phdr_to_pe_sec(base_addr, phdr):
+    return {
+                "Name"                : (0x554B5F5048445200, 0x8),  # UK_PHDR
+                "VirtualSize"         : (phdr["MemSiz"], 0x4),
+                "VirtualAddress"      : (phdr["VirtAddr"] - base_addr +
+                                                                                MZ_PE_HDR["SizeOfHeaders"][0], 0x4),
+                "SizeOfRawData"       : (phdr["FileSiz"], 0x4),
+                "PointerToRawData"    : (phdr["Offset"] +
+                                         MZ_PE_HDR["SizeOfHeaders"][0], 0x4),
+                "PointerToRelocations": (0x0, 0x4),
+                "PointerToLinenumbers": (0x0, 0x4),
+                "NumberOfRelocations" : (0x0, 0x2),
+                "NumberOfLinenumbers" : (0x0, 0x2),
+                "Characteristics"     : (PE_SEC_CHRS, 0x4),
+           }
+
+# Get the absolute value of symbol, as seen through `nm`
+def get_sym_val(elf, sym):
+    exp = r'^\s*' + r'([a-f0-9]+)' + r'\s+[A-Za-z]\s+' + sym + r'$'
+    out = subprocess.check_output(['nm', elf])
+
+    re_out = re.findall(exp, out.decode('ASCII'), re.MULTILINE)
+    if len(re_out) != 1:
+        raise Exception('Found no ' + sym + ' symbol.')
+
+    return int(re_out[0], 16)
+
+# Get a list of all the PT_LOAD Program Headers
+HEXNUM_EXP = r'0x[a-f0-9]+'
+def get_loadable_phdrs(elf):
+    exp = (r'^\s*' + r'LOAD' + r'\s*' +
+           r'(' + HEXNUM_EXP + r')' + r'\s*' +
+           r'(' + HEXNUM_EXP + r')' + r'\s*' +
+           HEXNUM_EXP + r'\s*' +
+           r'(' + HEXNUM_EXP + r')' + r'\s*' +
+           r'(' + HEXNUM_EXP + r')' + r'\s*' +
+           r'(' + r'[RWE ]+' + r')' + r'\s*' +
+           HEXNUM_EXP + r'$')
+    out = subprocess.check_output(["readelf", "-l", elf],
+                                   stderr=subprocess.DEVNULL)
+    re_out = re.findall(exp, out.decode('ASCII'), re.MULTILINE)
+
+    return [{
+                "Offset"    : int(r[0], 16),
+                "VirtAddr"  : int(r[1], 16),
+                "FileSiz"   : int(r[2], 16),
+                "MemSiz"    : int(r[3], 16),
+                "Flags"     : r[4].replace(' ', '')
+            } for r in re_out]
+
+def main():
+    parser = argparse.ArgumentParser(
+    description='Update the fake PE32 header with the required metadata to'
+                'be bootable by a UEFI environment and overwrite the ELF64'
+                'Header with an empty MS-DOS stub.')
+    parser.add_argument('elf', help='path to ELF64 binary to process')
+    opt = parser.parse_args()
+
+    # We need to operate on the debug image for symbol values. But as far as
+    # ELF sections go, using the final image is enough.
+    elf_dbg = opt.elf + ".dbg"
+
+    # Fetch base address, end address and bss start address from ld script
+    base_addr = get_sym_val(elf_dbg, r'_base_addr')
+    bss_addr = get_sym_val(elf_dbg, r'__bss_start')
+
+    # Consider the first PT_LOAD Segment as the first in the file.
+    ld_phdrs = get_loadable_phdrs(opt.elf)
+
+    # Make sure they are sorted by their addresses
+    ld_phdrs = sorted(ld_phdrs, key=lambda x: x['VirtAddr'] + x['MemSiz'])
+    end_addr = ld_phdrs[-1]["VirtAddr"] + ld_phdrs[-1]["MemSiz"]
+
+    # Again, all addresses relative to the very base of the file, because PE
+    # loading considers the MZ/PE/COFF headers as the first thing loaded.
+    # Use the first function in our EFI stub as the entry point.
+    entry_addr = get_sym_val(elf_dbg, r'uk_efi_entry64')
+
+    # PE is loaded by sections, thus the fake PE Header encodes the PT_LOAD's
+    # as PE sections with all permissions enabled (RWX)
+    with open(opt.elf, 'r+b') as f:
+        elf_file = f.read()
+
+        # Get Machine
+        if sum(elf_file[18:20]) == ELF_MACHINE["arm64"]:
+            MZ_PE_HDR["Machine"][0] = PE_MACHINE["arm64"]
+        elif sum(elf_file[18:20]) == ELF_MACHINE["x86_64"]:
+            MZ_PE_HDR['Machine'][0] = PE_MACHINE["x86_64"]
+        else:
+            raise Exception("Unknown architecture. See ELF_MACHINE/PE_MACHINE")
+
+        MZ_PE_HDR["NumberOfSections"][0] = len(ld_phdrs)
+
+        MZ_PE_HDR["ImageBase"][0] = base_addr
+
+        # SizeOfHeaders should be everything in the binary before the actual
+        # loadable PE sections start. So:
+        # 1. Size of all enumerated loadable PE section headers obtained from
+        # loadable ELF segments
+        MZ_PE_HDR["SizeOfHeaders"][0] = PE_SEC_SZ * (len(ld_phdrs) + 1)
+        # 2. Size of all PE/COFF headers fields from the specification
+        MZ_PE_HDR["SizeOfHeaders"][0] += 200  # See MZ_PE_HDR Offset comments
+        # 3. Respect SectionAlignment PE/COFF field: PAGE_ALIGN_DOWN
+        MZ_PE_HDR["SizeOfHeaders"][0] = MZ_PE_HDR["SizeOfHeaders"][0] & ~0xfff
+        # 4. add + PAGE_SIZE to previous PAGE_ALIGN_DOWN
+        MZ_PE_HDR["SizeOfHeaders"][0] += 0x1000
+
+        # Everything is relative to the image because the whole PE is loaded
+        # into memory including its headers. So, previously, end_addr was the
+        # last relative address of the last loadable PE section. Now update it
+        # to also include the headers that will also be loaded into memory.
+        end_addr += MZ_PE_HDR["SizeOfHeaders"][0]
+
+        # Same story for entry_addr
+        entry_addr += MZ_PE_HDR["SizeOfHeaders"][0]
+
+        MZ_PE_HDR["AddressOfEntryPoint"][0] = entry_addr - base_addr
+
+        MZ_PE_HDR["SizeOfUninitializedData"][0] = end_addr - bss_addr
+
+        MZ_PE_HDR["SizeOfImage"][0] = (MZ_PE_HDR["SizeOfHeaders"][0] +
+                                       (end_addr - base_addr))
+
+        for lp in ld_phdrs:
+            if 'E' in lp["Flags"] and 'R' in lp["Flags"]:
+                MZ_PE_HDR["SizeOfCode"][0] += lp["MemSiz"]
+                MZ_PE_HDR["BaseOfCode"][0] = (lp["VirtAddr"] - base_addr
+                                              + MZ_PE_HDR["SizeOfHeaders"][0])
+            elif 'R' in lp["Flags"]:
+                MZ_PE_HDR["SizeOfInitializedData"][0] += lp["MemSiz"]
+
+        # The loop above also added the virtual size of the loadable segment
+        # containing the .bss section, which is uninitialized.
+        # Therefore, subtract .bss size.
+        MZ_PE_HDR["SizeOfInitializedData"][0] -= end_addr - bss_addr
+
+        for l in ld_phdrs:
+            MZ_PE_HDR["SectionHeaders"].append(elf_phdr_to_pe_sec(base_addr, l))
+
+               # Write the MS-DOS signature and the rest of the PE/COFF header fields
+        f.seek(0)
+        for field in [k for k in MZ_PE_HDR.keys() if k != "SectionHeaders"]:
+            f.write(MZ_PE_HDR[field][0].to_bytes(MZ_PE_HDR[field][1], 'little'))
+
+        for s in MZ_PE_HDR["SectionHeaders"]:
+            for field in s.keys():
+                f.write(s[field][0].to_bytes(s[field][1], 'little'))
+
+        # Go to the end of the PE/COFF headers and append the original ELF64
+        f.seek(MZ_PE_HDR["SizeOfHeaders"][0])
+        f.write(elf_file)
+
+if __name__ == '__main__':
+    main()