plat/kvm/x86: Make the Unikernel position independent

author Sergiu Moga <sergiu.moga@protonmail.com>

Wed, 22 Mar 2023 18:45:52 +0000 (20:45 +0200)

committer Unikraft <monkey@unikraft.io>

Fri, 11 Aug 2023 08:11:27 +0000 (08:11 +0000)
author Sergiu Moga <sergiu.moga@protonmail.com>
Wed, 22 Mar 2023 18:45:52 +0000 (20:45 +0200)
committer Unikraft <monkey@unikraft.io>
Fri, 11 Aug 2023 08:11:27 +0000 (08:11 +0000)
diff --git a/plat/kvm/Config.uk b/plat/kvm/Config.uk

index 822bb5e9768808e1b8436fa0636aefaa12f37bc5..5ca7e9e30e2c9e16c62b209067b58a0cf6d503d5 100644 (file)
--- a/plat/kvm/Config.uk
+++ b/plat/kvm/Config.uk
@@ -7,7 +7,8 @@ menuconfig PLAT_KVM
         select LIBUKTIMECONV
         select LIBNOLIBC if !HAVE_LIBC
         select LIBFDT if ARCH_ARM_64
-       select LIBUKRELOC if ARCH_ARM_64 && OPTIMIZE_PIE
+       select ELF64_TO_32 if ARCH_X86_64
+       select LIBUKRELOC if OPTIMIZE_PIE
         help
                  Create a Unikraft image that runs as a KVM guest
  
diff --git a/plat/kvm/x86/lcpu_helpers.S b/plat/kvm/x86/lcpu_helpers.S

index 711e5b3a583acebadaa823a1b0cc0cbe0e15d68a..ae4e67072bfbfead1cd6e9b69138da48da59c619 100644 (file)
--- a/plat/kvm/x86/lcpu_helpers.S
+++ b/plat/kvm/x86/lcpu_helpers.S
@@ -56,15 +56,19 @@
  
         fninit
  #if __SSE__
-       jmp 2f
-1:
+       jmp     ldmxcsr_rval_addr + 0x4
+
+ldmxcsr_rval_addr:
         .long   0x1f80          /* Power-on default */
-2:
+
         movq    %cr4, %rdi
         orl     $(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT), %edi
         movq    %rdi, %cr4
  
-       ldmxcsr (1b)
+       ur_mov  ldmxcsr_rval_addr, %ebx, 4
+
+       ldmxcsr (%ebx)
+
  #endif /* __SSE__ */
  .endm
  
diff --git a/plat/kvm/x86/lcpu_start.S b/plat/kvm/x86/lcpu_start.S

index 6cd8bcbf212dd2eaa42e6c0fac447e83d4d0c0a1..57f1dd10a2b773a03c8dddbba416a0114132bc18 100644 (file)
--- a/plat/kvm/x86/lcpu_start.S
+++ b/plat/kvm/x86/lcpu_start.S
@@ -37,23 +37,49 @@
  #include <uk/config.h>
  #include <uk/asm.h>
  #include <uk/plat/common/lcpu.h>
+#include <uk/reloc.h>
  #include <x86/cpu_defs.h>
  #include <kvm-x86/traps.h>
+#include "lcpu_helpers.S"
  
  #define SEC_BEGIN(x)           .globl x86_##x##_begin; x86_##x##_begin = .;
  #define SEC_END(x)             .globl x86_##x##_end; x86_##x##_end = .;
-#define SEC_RELOC(x, s, addr)  (x - x86_##s##_begin + addr)
  
-#define LOAD_ADDR16            0x8000
-#define RELOC16(x)             SEC_RELOC(x, start16, LOAD_ADDR16)
+#define START16_PLACEHOLDER    0x1516  /* IS16 */
+#define START32_PLACEHOLDER    0x1532  /* IS32 */
  
  .section .data.boot.16
  .globl x86_start16_addr
  x86_start16_addr:
-       .quad   LOAD_ADDR16
+       .quad   START16_PLACEHOLDER
+
+/* Implement dedicate ur_* macro's whose only use-case is this file to cope with
+ * the existence of 16-bit code. This is so it does not interfere with the other
+ * uses of the ur_* macro's. For example, we not want symbols for these to
+ * occupy unnecessary space in .uk_reloc.
+ */
+.macro ur_mov_start16  sym:req, reg:req, bytes:req
+       mov     $START16_PLACEHOLDER, \reg
+.globl \sym\()_uk_reloc_imm\bytes\()_start16
+.set   \sym\()_uk_reloc_imm\bytes\()_start16, (. - \bytes)
+       nop
+.endm
+
+.macro ur_data_start16 type:req, sym:req, bytes:req
+.globl \sym\()_uk_reloc_data\bytes\()_start16
+.set   \sym\()_uk_reloc_data\bytes\()_start16, .
+       .\type  START16_PLACEHOLDER
+.endm
  
  /* The following section has to be copied to LOAD_ADDR16 (4KiB max!) at runtime
- * as 16-bit real mode entry point
+ * as 16-bit real mode entry point.
+ * `uk_reloc_{mov|data}` may be used in start16 to be able to cope with the
+ * fact that R_X86_64_16, R_X86_64_32 and R_X86_64_32S relocations are
+ * incompatible with an x86-64 PIE but they will not appear in the final binary,
+ * as mkukreloc.py will know they are redundant for the early relocator.
+ * Instead, these uk_reloc's will be hardcoded in a corresponding locally
+ * declared `struct uk_reloc` residing in the frame of the method that relocates
+ * this very code, during Application Processor environment setup.
   */
  .section .text.boot.16
  SEC_BEGIN(start16)
@@ -64,7 +90,15 @@ ENTRY(lcpu_start16_ap)
         xorl    %edi, %edi
         xorl    %esi, %esi
  
-       jmp     lcpu_start16
+       ur_mov_start16  lcpu_start16, %ax, 2
+       /* On start-up a core's %cs is set depending on the value of the vector
+        * inside the SIPI message, so make sure we are jumping to the
+        * proper address w.r.t. segmentation.
+        */
+       movl    %cs, %ebx
+       shll    $4, %ebx
+       subl    %ebx, %eax
+       jmp     *%eax
  END(lcpu_start16_ap)
  
  /*
@@ -83,7 +117,7 @@ gdt32_null:
         .word   0x0000
  gdt32_ptr:
         .word   (gdt32_end - gdt32 - 1) /* size - 1     */
-       .long   RELOC16(gdt32)          /* GDT address  */
+       ur_data_start16 long, gdt32, 4  /* GDT address  */
  gdt32_cs:
         .quad   GDT_DESC_CODE32_VAL     /* 32-bit CS    */
  gdt32_ds:
@@ -103,8 +137,28 @@ ENTRY(lcpu_start16)
         movl    %eax, %cr0
  
         /* Load 32-bit GDT and jump into 32-bit code segment */
-       lgdt    RELOC16(gdt32_ptr)
-       ljmp    $(gdt32_cs - gdt32), $RELOC16(jump_to32)
+       ur_mov_start16  gdt32_ptr, %ax, 2
+       lgdt    (%eax)
+
+       /* ljmp encoding has 5 opcodes, thus 40 bits, which in our case
+        * represent:
+        * [39:31] = 0xea - the identifier opcode of ljmp itself
+        * [31:15] = intersegment 2 byte immediate address to place into %eip
+        * [15: 0] = value to be placed into code segment register
+        * Thus, since we do not know the address of jump_to32 at runtime, we
+        * will generate a reloc symbol to be resolved.
+        * When we get to this point we would place into %eax the address
+        * of jump_to32. For the ljmp to jump to this address we replace the
+        * initial RELOC16(jump_to32) value with jump_to32 by patching at
+        * runtime the [31:15] fields of ljmp - known to be at an offset
+        * of -4 bytes ([15: 0] + [31:15]) from jump_to32, since this
+        * address corresponds to the very next instruction in memory
+        * after our ljmp.
+        */
+       ur_mov_start16  jump_to32, %ax, 2
+       movw    %ax, -4(%eax)
+       ljmp    $(gdt32_cs - gdt32), $START16_PLACEHOLDER
+
  .code32
  jump_to32:
         /* Set up remaining segment registers */
@@ -117,7 +171,8 @@ jump_to32:
         movl    %eax, %fs
         movl    %eax, %gs
  
-       leal    lcpu_start32, %eax
+       ur_mov_start16  lcpu_start32, %eax, 4
+
         jmp     *%eax
  END(lcpu_start16)
  
@@ -134,6 +189,7 @@ SEC_END(start16)
   * potential platform boot arguments in ESI (e.g., multiboot) if this is the
   * first boot.
   */
+
  .section .data.boot.32
  .align 16
  gdt64:
@@ -146,7 +202,7 @@ gdt64_ds:
  gdt64_end:
  gdt64_ptr:
         .word   gdt64_end - gdt64 - 1
-       .quad   gdt64
+       ur_data quad, gdt64, 8, _phys
  
  #define CR4_BOOT32_SETTINGS                                            \
           X86_CR4_PAE   /* Physical Address Extension */
@@ -173,15 +229,26 @@ ENTRY(lcpu_start32)
         wrmsr
  
         /* Set boot page table and enable paging */
-       movl    $x86_bpt_pml4, %eax
+       ur_mov  x86_bpt_pml4, %eax, 4, _phys
         movl    %eax, %cr3
  
         movl    $CR0_BOOT32_SETTINGS, %eax
         movl    %eax, %cr0
  
         /* Load 64-bit GDT and jump to 64-bit code segment */
-       lgdt    gdt64_ptr
-       ljmp    $(gdt64_cs - gdt64), $jump_to64
+       ur_mov  gdt64_ptr, %eax, 4
+       lgdt    (%eax)
+
+       /* Again, we use the same strategy, only this time we generate an actual
+        * uk_reloc entry to be automatically resolved by the early relocator,
+        * instead of relying on the code that relocates the start16 section
+        * before starting the Application Processors, since execution of
+        * lcpu_start32 comes before that.
+        */
+       ur_mov  jump_to64, %eax, 4
+       movl    %eax, -6(%eax)
+       ljmp    $(gdt64_cs - gdt64), $START32_PLACEHOLDER
+
  .code64
  jump_to64:
         /* Set up remaining segment registers */
@@ -194,7 +261,7 @@ jump_to64:
         movl    %eax, %fs
         movl    %eax, %gs
  
-       leaq    lcpu_start64, %rcx
+       leaq    lcpu_start64(%rip), %rcx
         jmp     *%rcx
  END(lcpu_start32)
  
@@ -213,7 +280,6 @@ END(lcpu_start32)
   *
   * NOTE: Code should be position-independent
   */
-#include "lcpu_helpers.S"
  
  .code64
  .section .text.boot.64
diff --git a/plat/kvm/x86/multiboot.S b/plat/kvm/x86/multiboot.S

index 934cd15223d46deabc8ff27f5345e50859c93534..fabadbe11d347f38608599bb80fddcb193f21357 100644 (file)
--- a/plat/kvm/x86/multiboot.S
+++ b/plat/kvm/x86/multiboot.S
@@ -36,6 +36,7 @@
  
  #include <uk/config.h>
  #include <uk/asm.h>
+#include <uk/reloc.h>
  
  #include <kvm-x86/multiboot.h>
  
@@ -59,8 +60,8 @@ lcpu_bootstack:
  
  .section .rodata
  lcpu_boot_startup_args:
-       .quad   multiboot_entry
-       .quad   lcpu_bootstack
+       ur_data quad, multiboot_entry, 8
+       ur_data quad, lcpu_bootstack, 8
  
  /**
   * 32-bit multiboot entry function
@@ -77,10 +78,18 @@ ENTRY(_multiboot_entry)
         cmpl    $MULTIBOOT_BOOTLOADER_MAGIC, %eax
         jne     no_multiboot
  
-       movl    $lcpu_boot_startup_args, %edi   /* startup args for boot CPU */
+       /* Hardcoding for now I guess... */
+       movl    $0x00100000, %edi
+       movl    $0x00000000, %esi
+       movl    $0x00100000, %edx
+       do_uk_reloc32   0
+
+       /* startup args for boot CPU */
+       ur_mov  lcpu_boot_startup_args, %edi, 4, _phys
         movl    %ebx, %esi                      /* multiboot info */
  
-       jmp     lcpu_start32
+       ur_mov  lcpu_start32, %ebx, 4, _phys
+       jmp     *%ebx
  
  no_multiboot:
         cli
diff --git a/plat/kvm/x86/multiboot.c b/plat/kvm/x86/multiboot.c

index 30ea74ac4a3db528347e37725c616436b2010c66..f11cf9117aacde912fe3829f62c50566ba580d6d 100644 (file)
--- a/plat/kvm/x86/multiboot.c
+++ b/plat/kvm/x86/multiboot.c
@@ -13,6 +13,7 @@
  #include <uk/plat/common/lcpu.h>
  #include <uk/plat/common/memory.h>
  #include <uk/plat/common/sections.h>
+#include <uk/reloc.h>
  #include <kvm-x86/multiboot.h>
  
  #include <errno.h>
@@ -55,6 +56,12 @@ void multiboot_entry(struct lcpu *lcpu, struct multiboot_info *mi)
         if (unlikely(!bi))
                 multiboot_crash("Incompatible or corrupted bootinfo", -EINVAL);
  
+       /* We have to call this here as the very early do_uk_reloc32 relocator
+        * does not also relocate the UKPLAT_MEMRT_KERNEL mrd's like its C
+        * equivalent, do_uk_reloc, does.
+        */
+       do_uk_reloc_kmrds(0, 0);
+
         /* Add the cmdline */
         if (mi->flags & MULTIBOOT_INFO_CMDLINE) {
                 if (mi->cmdline) {
diff --git a/plat/kvm/x86/pagetable64.S b/plat/kvm/x86/pagetable64.S

index bc3f49ca9fa93cfd5b0edcb2df23d2ae3689a9d5..6923c12074e815190b43122f7e603a0178f7e086 100644 (file)
--- a/plat/kvm/x86/pagetable64.S
+++ b/plat/kvm/x86/pagetable64.S
@@ -33,6 +33,7 @@
  
  #include <uk/config.h>
  #include <uk/arch/paging.h>
+#include <uk/reloc.h>
  
  #define PTE_RO         X86_PTE_PRESENT
  #define PTE_RW         (X86_PTE_PRESENT | X86_PTE_RW)
@@ -139,10 +140,10 @@ x86_bpt_pd0_3: /* 2M pages */
  
  .align 0x1000
  x86_bpt_pdpt0: /* 1G pages */
-       pte      x86_bpt_pd0_0, PTE_RW
-       pte      x86_bpt_pd0_1, PTE_RW
-       pte      x86_bpt_pd0_2, PTE_RW
-       pte      x86_bpt_pd0_3, PTE_RW
+       ur_pte  x86_bpt_pd0_0, PTE_RW
+       ur_pte  x86_bpt_pd0_1, PTE_RW
+       ur_pte  x86_bpt_pd0_2, PTE_RW
+       ur_pte  x86_bpt_pd0_3, PTE_RW
         pte_zero , 0x1fc
  
  /* Page table for 512 GiB direct-mapped physical memory */
@@ -155,10 +156,10 @@ x86_bpt_pdpt511: /* 1G pages */
  .align 0x1000
  .global x86_bpt_pml4
  x86_bpt_pml4:
-       pte      x86_bpt_pdpt0  , PTE_RW
+       ur_pte  x86_bpt_pdpt0, PTE_RW
         pte_zero , 0x1fe
  #ifdef CONFIG_PAGING
-       pte      x86_bpt_pdpt511, PTE_RW
+       ur_pte  x86_bpt_pdpt511, PTE_RW
  #else
         pte_zero , 0x001
  #endif /* CONFIG_PAGING */
diff --git a/support/scripts/mkukreloc.py b/support/scripts/mkukreloc.py

index c955046e0e918822ae373e7925e31eec959d0ddc..38358d7e74cb802a52364438060e25dc48b18fdc 100755 (executable)
--- a/support/scripts/mkukreloc.py
+++ b/support/scripts/mkukreloc.py
@@ -144,13 +144,18 @@ def get_nm_sym_exp(sym):
      )
  
  # Return a dictionary only with the relevant fields described above
+# In the case of x86, we must take care in avoiding the _start16 symbols
+# that are used for the bootstrap code of the Application Processors
+# which will be relocated at runtime during SMP setup only, separately
+# from the early self relocator.
+x86_ignore_sym_substring = '_start16'
  def get_nm_syms(elf, sym):
      nm_sym_exp = get_nm_sym_exp(sym)
      out = subprocess.check_output(["nm", elf])
  
      _nm_syms = re.findall(nm_sym_exp, out.decode('ASCII'), re.MULTILINE)
  
-    return [s for s in _nm_syms]
+    return [s for s in _nm_syms if x86_ignore_sym_substring not in s[1]]
  
  def rela_to_uk_reloc(rela):
      # Offset and Sym. Name + Addend already have the link time address added
author	Sergiu Moga <sergiu.moga@protonmail.com>
	Wed, 22 Mar 2023 18:45:52 +0000 (20:45 +0200)
committer	Unikraft <monkey@unikraft.io>
	Fri, 11 Aug 2023 08:11:27 +0000 (08:11 +0000)
plat/kvm/Config.uk		patch \| blob \| blame \| history
plat/kvm/x86/lcpu_helpers.S		patch \| blob \| blame \| history
plat/kvm/x86/lcpu_start.S		patch \| blob \| blame \| history
plat/kvm/x86/multiboot.S		patch \| blob \| blame \| history
plat/kvm/x86/multiboot.c		patch \| blob \| blame \| history
plat/kvm/x86/pagetable64.S		patch \| blob \| blame \| history
support/scripts/mkukreloc.py		patch \| blob \| blame \| history