From: Andrew Cooper Date: Thu, 26 May 2016 16:37:30 +0000 (+0100) Subject: Pagetable Emulation testing X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=refs%2Fheads%2Fpagetable-emulation;p=people%2Fandrewcoop%2Fxen-test-framework.git Pagetable Emulation testing Signed-off-by: Andrew Cooper --- diff --git a/Doxyfile b/Doxyfile index f63d04f..3427804 100644 --- a/Doxyfile +++ b/Doxyfile @@ -2262,6 +2262,7 @@ PREDEFINED = __alias(x)= \ __maybe_unused \ __noinline \ __noreturn \ + __transparent \ __used \ __user_data \ __user_page_aligned_bss \ diff --git a/arch/x86/include/arch/cpuid.h b/arch/x86/include/arch/cpuid.h index e6d6a8c..ebe21d1 100644 --- a/arch/x86/include/arch/cpuid.h +++ b/arch/x86/include/arch/cpuid.h @@ -97,6 +97,7 @@ static inline bool cpu_has(unsigned int feature) #define cpu_has_umip cpu_has(X86_FEATURE_UMIP) #define cpu_has_pku cpu_has(X86_FEATURE_PKU) +#define cpu_has_pks cpu_has(X86_FEATURE_PKS) #endif /* XTF_X86_CPUID_H */ diff --git a/arch/x86/include/arch/lib.h b/arch/x86/include/arch/lib.h index 0a9b23d..dea7f2e 100644 --- a/arch/x86/include/arch/lib.h +++ b/arch/x86/include/arch/lib.h @@ -390,9 +390,35 @@ static inline void clflush(const void *ptr) asm volatile ("clflush %0" :: "m" (*(const char *)ptr)); } -static inline void flush_tlb(void) +static inline uint64_t rdpkru(void) { - write_cr3(read_cr3()); + uint32_t low, high; + + asm volatile (".byte 0x0f, 0x01, 0xee;" + : "=a" (low), "=d" (high) : "c" (0)); + + return ((uint64_t)high << 32) | low; +} + +static inline void wrpkru(uint64_t pkru) +{ + asm volatile (".byte 0x0f, 0x01, 0xef;" + : + : "a" ((uint32_t)pkru), "d" ((uint32_t)(pkru >> 32)), + "c" (0)); +} + +static inline void flush_tlb(bool global_mappings) +{ + unsigned long cr4; + + if ( global_mappings && ((cr4 = read_cr4()) & X86_CR4_PGE) ) + { + write_cr4(cr4 & ~X86_CR4_PGE); + write_cr4(cr4); + } + else + write_cr3(read_cr3()); } #endif /* XTF_X86_LIB_H */ diff --git a/arch/x86/include/arch/msr-index.h b/arch/x86/include/arch/msr-index.h index 0495c3f..c237c07 100644 --- a/arch/x86/include/arch/msr-index.h +++ b/arch/x86/include/arch/msr-index.h @@ -44,6 +44,8 @@ #define MSR_A_PMC(n) (0x000004c1 + (n)) +#define MSR_PKRS 0x000006e1 + #define MSR_X2APIC_REGS 0x00000800 #define MSR_EFER 0xc0000080 /* Extended Feature Enable Register */ diff --git a/arch/x86/include/arch/processor.h b/arch/x86/include/arch/processor.h index 1c7e655..c2785c8 100644 --- a/arch/x86/include/arch/processor.h +++ b/arch/x86/include/arch/processor.h @@ -60,6 +60,8 @@ #define X86_CR4_OSXSAVE 0x00040000 /* XSAVE/XRSTOR */ #define X86_CR4_SMEP 0x00100000 /* SMEP */ #define X86_CR4_SMAP 0x00200000 /* SMAP */ +#define X86_CR4_PKE 0x00400000 /* Protection Key Enable (user) */ +#define X86_CR4_PKS 0x01000000 /* Protection Key Supervisor */ /* * CPU features in XCR0. diff --git a/arch/x86/include/arch/xtf.h b/arch/x86/include/arch/xtf.h index da75ac8..c35b085 100644 --- a/arch/x86/include/arch/xtf.h +++ b/arch/x86/include/arch/xtf.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/docs/all-tests.dox b/docs/all-tests.dox index 892a9e4..cd8a566 100644 --- a/docs/all-tests.dox +++ b/docs/all-tests.dox @@ -32,6 +32,8 @@ and functionality. @subpage test-nmi-taskswitch-priv - Task Gate handling of interrupts. +@subpage test-pagetable-emulation + @subpage test-pv-fsgsbase - FSGSBASE behaviour for PV guests. @subpage test-pv-iopl - IOPL emulation for PV guests. diff --git a/include/xen/arch-x86/cpufeatureset.h b/include/xen/arch-x86/cpufeatureset.h index f66a4ab..df56b22 100644 --- a/include/xen/arch-x86/cpufeatureset.h +++ b/include/xen/arch-x86/cpufeatureset.h @@ -147,6 +147,7 @@ #define X86_FEATURE_UMIP (6*32+ 2) /* User-Mode Instruction Prevention */ #define X86_FEATURE_PKU (6*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (6*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_PKS (6*32+31) /* Protection Keys for Supervisor */ /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */ #define X86_FEATURE_ITSC (7*32+ 8) /* Invariant TSC */ diff --git a/include/xtf/compiler.h b/include/xtf/compiler.h index 19d9349..1c24d9b 100644 --- a/include/xtf/compiler.h +++ b/include/xtf/compiler.h @@ -11,6 +11,7 @@ #define __packed __attribute__((__packed__)) #define __printf(f, v) __attribute__((__format__(__printf__, f, v))) #define __maybe_unused __attribute__((__unused__)) +#define __transparent __attribute__((__transparent_union__)) #define __used __attribute__((__used__)) #define __weak __attribute__((__weak__)) diff --git a/tests/pagetable-emulation/Makefile b/tests/pagetable-emulation/Makefile new file mode 100644 index 0000000..01a2769 --- /dev/null +++ b/tests/pagetable-emulation/Makefile @@ -0,0 +1,11 @@ +include $(ROOT)/build/common.mk + +NAME := pagetable-emulation +CATEGORY := functional +TEST-ENVS := $(HVM_ENVIRONMENTS) + +VARY-CFG := hap shadow + +obj-perenv += stubs.o main.o + +include $(ROOT)/build/gen.mk diff --git a/tests/pagetable-emulation/main.c b/tests/pagetable-emulation/main.c new file mode 100644 index 0000000..77e1668 --- /dev/null +++ b/tests/pagetable-emulation/main.c @@ -0,0 +1,1190 @@ +/** + * @file tests/pagetable-emulation/main.c + * @ref test-pagetable-emulation + * + * @page test-pagetable-emulation Pagetable and pagewalk emulation + * + * This test aims to be a comprehensive test of all interesting bits in + * pagetable entries, in each paging mode. + * + * The purpose of this test is to compare the behaviour of real hardware + * against Xen's emulated pagewalk. + * + * There are several types of memory access in the x86 ISA + * + * 1. Data read and write. + * + * Data accesses are tested with a single byte read or write (of the value + * 0xc3) of the linear address under test, with an optional force emulate + * prefix before the mov instruction. + * + * 2. Instruction fetch. + * + * Fetches acts similarly to a data reads, but have different control + * settings which affect whether an access succeeds or fails. We `call` + * the page boundary under test, with the expectation of hitting the `ret` + * instruction placed there. + * + * To correctly test an emulated fetch at the boundary, the force emulate + * prefix needs to execute successfully, and cause the resulting `%%rip` + * update to cross the page boundary. To allow the FEP to execute + * successfully, it may not share a pagetable entry in common with the + * linear address under test, meaning that the page boundary under test + * must be between two entries in the top level pagetable. + * + * 3. Implicit data access. + * + * An implicit access occurs as a side effect of an instruction which + * references a descriptor table. This test uses `mov sel, %%fs` to + * trigger an implicit LDT access, with an optional force emulation prefix + * before the mov instruction. @todo Figure out how to sensibly test + * implicit writes which need to set the descriptor Accessed bit. This + * ends up being two separate memory accesses with different + * writeabilities, which will hit the same linear address under test. + * + * Overall, the following in-memory gadget is used: + * + * _XEN_ASM_FEP LDT[0] with `ret` at the start + * +-------------------+----------------------------+ + * | -- 0f 0b 78 65 6e | c3 ff 00 00 00 f3 cf 00 -- | + * +-------------------+----------------------------+ + * ^ page boundary under test + * + * which allows us to construct all the memory accesses we would like to test. + * + * The primary paging mode comes from the test environment, which generates + * some conditional behaviour based on `CONFIG_PAGING_LEVELS`. The test + * includes unpaged mode, because while the guest logically has `%%cr0.pg` + * clear, the processor actually has paging enabled and Xen has some juggling + * to do behind the scenes to cause correct behaviour to occur. + * + * Various observations about pagewalks: + * + * 1. Intel and AMD differ on the behaviour of `L4e.GLOBAL`. Intel ignore the + * bit, while AMD treat it as strictly reserved. + * + * 2. Hardware does distinguish between instruction and data accesses, even if + * `EFER.NXE` or `CR4.SMEP` are clear, and the I/d flag isn't reported in + * the @#PF error code. + * + * Outstanding issues: + * + * 1. AMD EPYC erratum #1053 + * + * Intel and AMD's implementation of SMAP differes, with AMD's behaviour + * being better from a hypervisors point of view. AMD have issued an + * erratum, but the model in this test doesn't adequately cope yet. + * + * @see tests/pagetable-emulation/main.c + */ +#include + +#include "stubs.h" + +const char test_title[] = "Test pagetable-emulation"; + +#define LDT_SEL 0x0007 /**< Entry 0, LDT, RPL3 */ + +#define X86_PFEC_IMP (1u << 16) + +#if CONFIG_PAGING_LEVELS == 0 +# define PKR_WD_VAL 0xaaaaaaaa +# define PKR_AD_VAL 0x55555555 +#else +# define PKR_WD_VAL 0x80000000 +# define PKR_AD_VAL 0x40000000 +#endif + +typedef union +{ + unsigned int raw; + struct { + bool wp:1; + bool ac:1; + bool nx:1; + bool smep:1; + bool smap:1; + bool pke:1; + bool pkru_wd:1; + bool pkru_ad:1; + bool pks:1; + bool pkrs_wd:1; + bool pkrs_ad:1; + + bool max:1; /* Must be last; upper boundary. */ + }; +} __transparent pg_ctrl_t; + + +static bool shadow_paging; +static uint64_t pa_rsvd; /**< Bits which are reserved due to maxphysaddr. */ +static paddr_t paddrs[2]; /**< Physical addresses to test. */ + +/** Number of individual tests performed. Purely informative. */ +static unsigned long nr_tests; + +static struct test_printinfo { + unsigned long va; + bool active; + bool user; + bool force; + const char *desc; + pg_ctrl_t ctrl; + + intpte_t pteval; + bool pte_printed; +} under_test; + +static const struct stubs +{ + unsigned long (*read) (unsigned long va); + unsigned long (*implicit) (unsigned long sel); + unsigned long (*write) (unsigned long va); + unsigned long (*exec) (unsigned long va); + unsigned long (*read_user) (unsigned long va); + unsigned long (*implicit_user) (unsigned long sel); + unsigned long (*write_user) (unsigned long va); + unsigned long (*exec_user) (unsigned long va); +} native_stubs = { + .read = stub_read, + .implicit = stub_implicit, + .write = stub_write, + .exec = stub_exec, + .read_user = stub_read_user, + .implicit_user = stub_implicit_user, + .write_user = stub_write_user, + .exec_user = stub_exec_user, +}, emul_stubs = { + .read = stub_force_read, + .implicit = stub_force_implicit, + .write = stub_force_write, + .exec = stub_force_exec, + .read_user = stub_force_read_user, + .implicit_user = stub_force_implicit_user, + .write_user = stub_force_write_user, + .exec_user = stub_force_exec_user, +}; + +struct mapping_info +{ + unsigned int level, order; + void *va; + intpte_t *pte, *fe_pte; + uint64_t paddr; + bool leaf; + + union + { + intpte_t *ptes[4]; + struct + { + intpte_t *l1e, *l2e, *l3e, *l4e; + }; + }; +}; + +/** + * Clear all Access and Dirty bits on the PTEs making up the linear address + * under test, and shoot the mapping down. + */ +static void clear_ad(struct mapping_info *m) +{ + unsigned int i; + + for ( i = 0; i < ARRAY_SIZE(m->ptes); ++i ) + if ( m->ptes[i] ) + *m->ptes[i] &= ~_PAGE_AD; + + invlpg(m->va); +} + +bool ex_check_pf(struct cpu_regs *regs, + const struct extable_entry *ex) +{ + if ( regs->entry_vector == X86_EXC_PF ) + { + unsigned long cr2 = read_cr2(); + + if ( cr2 != under_test.va ) + xtf_failure("Bad %%cr2: expected %p, got %p\n", + _p(under_test.va), _p(cr2)); + + regs->ax = EXINFO_SYM(PF, regs->error_code); + + if ( ex->fixup ) + regs->ip = ex->fixup; + else + { + if ( cpu_has_smap ) + asm volatile ("stac"); + + regs->ip = *(unsigned long *)cpu_regs_sp(regs); + } + + return true; + } + + return false; +} + +static pg_ctrl_t get_current_pg_ctrl(void) +{ + pg_ctrl_t ctrl = {}; + unsigned long cr4 = read_cr4(); + + ctrl.wp = read_cr0() & X86_CR0_WP; + ctrl.ac = read_flags() & X86_EFLAGS_AC; + ctrl.nx = rdmsr(MSR_EFER) & EFER_NXE; + ctrl.smep = cr4 & X86_CR4_SMEP; + ctrl.smap = cr4 & X86_CR4_SMAP; + ctrl.pke = cr4 & X86_CR4_PKE; + ctrl.pks = cr4 & X86_CR4_PKS; + + if ( cpu_has_pku ) + { + if ( !ctrl.pke ) + write_cr4(cr4 | X86_CR4_PKE); + + unsigned int pkru = rdpkru(); + + ctrl.pkru_wd = pkru & PKR_WD_VAL; + ctrl.pkru_ad = pkru & PKR_AD_VAL; + + if ( !ctrl.pke ) + write_cr4(cr4); + } + + if ( cpu_has_pks ) + { + uint32_t pkrs = rdmsr(MSR_PKRS); + + ctrl.pkrs_wd = pkrs & PKR_WD_VAL; + ctrl.pkrs_ad = pkrs & PKR_AD_VAL; + } + + return ctrl; +} + +static void __printf(1, 2) fail(const char *fmt, ...) +{ + va_list args; + + if ( !under_test.active ) + return; + + if ( !under_test.pte_printed ) + { + intpte_t pte = under_test.pteval; + printk(" PTE: %"PRIpte" =>%s%s%s%s%s%s%s%s%s\n", pte, + pte & _PAGE_NX ? " Nx" : "", + pte & 0x7800000000000000ull ? " Pk" : "", + pte & 0x07f0000000000000ull ? " Av" : "", + pte & pa_rsvd ? " Rs" : "", + pte & _PAGE_GLOBAL ? " G" : "", + pte & _PAGE_PSE ? " +" : "", + pte & _PAGE_USER ? " U" : "", + pte & _PAGE_RW ? " W" : "", + pte & _PAGE_PRESENT ? " P" : ""); + + pg_ctrl_t ctrl = under_test.ctrl; + printk(" CTRL:%s%s%s%s%s%s%s%s%s%s%s\n", + ctrl.pkrs_ad ? " PKSAD" : "", + ctrl.pkrs_wd ? " PKSWD" : "", + ctrl.pks ? " PKS" : "", + ctrl.pkru_ad ? " PKUAD" : "", + ctrl.pkru_wd ? " PKUWD" : "", + ctrl.pke ? " PKE" : "", + ctrl.smap ? " SMAP" : "", + ctrl.smep ? " SMEP" : "", + ctrl.nx ? " NX" : "", + ctrl.ac ? " AC" : "", + ctrl.wp ? " WP" : ""); + + pg_ctrl_t curr = get_current_pg_ctrl(); + if ( ctrl.raw != curr.raw ) + panic("Bad paging ctrl: Expected %#x, got %#x\n", + ctrl.raw, curr.raw); + + under_test.pte_printed = true; + } + + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + xtf_failure(NULL); + + static unsigned nr_failures; + if ( nr_failures >= 16 ) + { + printk("Excessive failures - giving up...\n"); + xtf_exit(); + } + nr_failures++; +} + +bool do_unhandled_exception(struct cpu_regs *regs) +{ + fail("ERROR: Unhandled exception during %s %s %s\n", + under_test.force ? "Emulated" : "Native", + under_test.user ? "User" : "Supervisor", + under_test.desc); + return false; +} + +static void prepare_virtual_mappings( + struct mapping_info *m, unsigned int level, bool super, paddr_t paddr) +{ + static intpte_t l1t[L1_PT_ENTRIES] __aligned(PAGE_SIZE); + static intpte_t l2t[L2_PT_ENTRIES] __aligned(PAGE_SIZE); + +#if CONFIG_PAGING_LEVELS > 3 + static intpte_t l3t[L3_PT_ENTRIES] __aligned(PAGE_SIZE); +#else + extern intpte_t l3t[]; +#endif + + bool pse36 = CONFIG_PAGING_LEVELS == 2 && paddr != (uint32_t)paddr; + + memset(m, 0, sizeof(*m)); + +#define PF_PGTBL PF_SYM(AD, U, RW, P) + if ( CONFIG_PAGING_LEVELS == 4 ) + { + + pae_l4_identmap[1] = pte_from_virt(l3t, PF_PGTBL); + pae_l4_identmap[2] = pte_from_virt(l3t, PF_PGTBL); + + l3t[0] = pte_from_virt(l2t, PF_PGTBL); + l3t[511] = pte_from_virt(l2t, PF_PGTBL); + + l2t[0] = pte_from_virt(l1t, PF_PGTBL); + l2t[511] = pte_from_virt(l1t, PF_PGTBL); + + l1t[0] = pte_from_paddr(paddr, PF_PGTBL); + l1t[511] = pte_from_paddr(paddr - 1, PF_PGTBL); + + m->va = _p(2ull << PAE_L4_PT_SHIFT); + m->l1e = &l1t[0]; + m->l2e = &l2t[0]; + m->l3e = &l3t[0]; + m->l4e = _p(&pae_l4_identmap[2]); + m->fe_pte = &l1t[511]; + + asm(_ASM_EXTABLE_HANDLER(2 << PAE_L4_PT_SHIFT, 0, ex_check_pf)); + } + else if ( CONFIG_PAGING_LEVELS == 3 ) + { + pae32_l3_identmap[1] = pte_from_virt(l2t, _PAGE_PRESENT); + pae32_l3_identmap[2] = pte_from_virt(l2t, _PAGE_PRESENT); + + l2t[0] = pte_from_virt(l1t, PF_PGTBL); + l2t[511] = pte_from_virt(l1t, PF_PGTBL); + + l1t[0] = pte_from_paddr(paddr, PF_PGTBL); + l1t[511] = pte_from_paddr(paddr - 1, PF_PGTBL); + + m->va = _p(2ull << PAE_L3_PT_SHIFT); + m->l1e = &l1t[0]; + m->l2e = &l2t[0]; + m->l3e = _p(&pae32_l3_identmap[2]); + m->l4e = NULL; + m->fe_pte = &l1t[511]; + + asm(_ASM_EXTABLE_HANDLER(2 << PAE_L3_PT_SHIFT, 0, ex_check_pf)); + } + else if ( CONFIG_PAGING_LEVELS == 2 ) + { + if ( pse36 ) + { + ASSERT(super); + ASSERT(IS_ALIGNED(paddr, MB(4))); + + pse_l2_identmap[511] = + fold_pse36((paddr - MB(4)) | PF_PGTBL | _PAGE_PSE); + pse_l2_identmap[512] = + fold_pse36(paddr | PF_PGTBL | _PAGE_PSE); + } + else + { + pse_l2_identmap[511] = pte_from_virt(l1t, PF_PGTBL); + pse_l2_identmap[512] = pte_from_virt(l1t, PF_PGTBL); + + l1t[0] = pte_from_paddr(paddr, PF_PGTBL); + l1t[1023] = pte_from_paddr(paddr - 1, PF_PGTBL); + } + + m->va = _p(2ull << PAE_L3_PT_SHIFT); + m->l1e = pse36 ? NULL : &l1t[0]; + m->l2e = _p(&pse_l2_identmap[512]); + m->l3e = NULL; + m->l4e = NULL; + m->fe_pte = pse36 ? _p(&pse_l2_identmap[511]) : &l1t[1023]; + + asm(_ASM_EXTABLE_HANDLER(2 << PAE_L3_PT_SHIFT, 0, ex_check_pf)); + } + else + panic("%s() PAGING_LEVELS %u not implemented yet\n", + __func__, CONFIG_PAGING_LEVELS); + +#undef PF_PGTBL + + /* Flush the TLB before trying to use the new mappings. */ + flush_tlb(false); + + m->level = level; + m->pte = m->ptes[level - 1]; + + if ( pse36 ) + { + /* No l1e at all. */ + m->order = PT_ORDER + PAGE_SHIFT; + m->paddr = *m->pte & ~0xfff; + m->leaf = true; + } + else if ( super && (read_cr4() & (X86_CR4_PAE|X86_CR4_PSE)) ) + { + /* Superpage in effect. */ + m->order = ((level - 1) * PT_ORDER) + PAGE_SHIFT; + m->paddr = *m->l1e & ~0xfff; + m->leaf = true; + } + else + { + /* Small page, or superpage not in effect. */ + m->order = 0; + m->paddr = *m->pte & ~0xfff; + m->leaf = (level == 1); + } +} + +static void check(struct mapping_info *m, exinfo_t actual, exinfo_t expected, + unsigned int walk) +{ + /* Check that the actual pagefault matched our expectation. */ + if ( actual != expected ) + fail(" Fail: expected %pe, got %pe for %s %s %s\n", + _p(expected), _p(actual), + under_test.force ? "Emulated" : "Native", + under_test.user ? "User" : "Supervisor", + under_test.desc); + + /* Check that A/D bits got updated as expected. */ + unsigned int leaf_level = + m->order ? ((m->order - PAGE_SHIFT) / PT_ORDER) : 0; + unsigned int i; /* NB - Levels are 0-indexed. */ + + if ( vendor_is_amd ) + { + /* + * On AMD hardware, updates to A/D bits are not ordered with respect + * to ordinary memory accesses. They are ordered by atomic + * operations, serialising events, and mfence. mfence is by far the + * lowsest overhead option when running on shadow pagetables, due to + * not causing a vmexit. + */ + mb(); + } + + for ( i = 0; i < ARRAY_SIZE(m->ptes); ++i ) + { + int exp_a, exp_d; + + if ( !m->ptes[i] ) + continue; + + if ( CONFIG_PAGING_LEVELS == 3 && i == 2 ) + { + /* + * 32bit PAE paging is special. The 4 PDPTE's are read from + * memory, cached in the processor and don't strictly count as + * pagetables. The A/D bits are not updated. + */ + exp_a = 0; + exp_d = 0; + } + else if ( leaf_level > i ) + { + /* + * Logically below a superpage. Nothing should consider this part + * of the pagetable structure, and neither A or D should be set. + */ + exp_a = 0; + exp_d = 0; + } + else if ( leaf_level == i ) + { + /* + * At a leaf page. If there was no fault, we expect A to be set, + * optionally D if a write occurred. + */ + exp_a = (actual == 0); + exp_d = exp_a && (walk & PFEC_SYM(W)); + } + else + { + /* + * Higher level translation structure. A processor is free to + * cache the partial translation or not, at its discretion, but + * they will never be dirty. + */ + exp_a = -1; + exp_d = 0; + } + + bool act_a = *m->ptes[i] & _PAGE_ACCESSED; + bool act_d = *m->ptes[i] & _PAGE_DIRTY; + + if ( (exp_a >= 0 && exp_a != act_a) || (exp_d != act_d) ) + fail(" Fail: expected L%u AD = %c%u, got %u%u for %s %s\n", + i + 1, exp_a == 1 ? '1' : exp_a == 0 ? '0' : 'x', exp_d, + act_a, act_d, + under_test.user ? "User" : "Supervisor", under_test.desc); + } + + clear_ad(m); + write_fs(0); +} + +static exinfo_t calc_fault(struct mapping_info *m, uint64_t pte, + unsigned int walk, pg_ctrl_t ctrl) + +{ + bool nx_valid = CONFIG_PAGING_LEVELS >= 3 && ctrl.nx; + uint64_t rsvd = pa_rsvd; + + /* Accumulate additional bits which are reserved. */ + if ( !nx_valid ) + rsvd |= _PAGE_NX; + + if ( m->level == 4 ) + rsvd |= _PAGE_PSE | (vendor_is_amd ? _PAGE_GLOBAL : 0); + else if ( m->level == 3 && !cpu_has_page1gb ) + rsvd |= _PAGE_PSE; + + if ( m->order ) + { + if ( CONFIG_PAGING_LEVELS > 2 || !cpu_has_pse36 ) + rsvd |= ((1ull << m->order) - 1) & ~(_PAGE_PSE_PAT | (_PAGE_PSE_PAT - 1)); + else + rsvd |= (1ull << 21) | fold_pse36(rsvd); + } + + if ( CONFIG_PAGING_LEVELS == 3 ) + rsvd |= 0x7ff0000000000000ull; + + /* + * A pagefault always reports the User and Writeable input from the + * pagewalk. + */ + exinfo_t fault = EXINFO_SYM(PF, walk & PFEC_SYM(U, W)); + + /* + * Instruction fetch (vs data access) is only reported when NX or SMEP are + * active. + */ + if ( nx_valid || ctrl.smep ) + fault |= (walk & PFEC_SYM(I)); + + /* Check whether a translation exists. */ + if ( !(pte & _PAGE_PRESENT) ) + return fault; + + /* When a translation exists, Present is always reported. */ + fault |= PFEC_SYM(P); + + /* Check whether any reserved bits are set. */ + if ( pte & rsvd ) + return fault | PFEC_SYM(R); + + /* Check access rights. */ + + if ( walk & PFEC_SYM(I) ) /* Instruction fetch. */ + { + if ( pte & _PAGE_NX ) + return fault; /* Fetch from NX page. */ + + if ( walk & PFEC_SYM(U) ) /* User fetch. */ + { + if ( !(pte & _PAGE_USER) ) + return fault; /* User fetch from supervisor page. */ + } + else /* Supervisor fetch. */ + { + if ( (pte & _PAGE_USER) && ctrl.smep ) + return fault; /* Supervisor fetch from user page with SMEP. */ + } + } + else /* Data access */ + { + if ( CONFIG_PAGING_LEVELS == 4 && /* Check Protection Key */ + m->leaf && (((pte >> 59) & 0xf) == 0xf) ) + { + if ( pte & PFEC_SYM(U) ) /* User mapping */ + { + if ( ctrl.pke && + (ctrl.pkru_ad || + ((walk & PFEC_SYM(W)) && ctrl.pkru_wd && + ((walk & PFEC_SYM(U)) || ctrl.wp))) ) + return fault | PFEC_SYM(K); + } + else /* Supervisor mapping */ + { + if ( ctrl.pks && + (ctrl.pkrs_ad || + ((walk & PFEC_SYM(W)) && ctrl.pkrs_wd && + ((walk & PFEC_SYM(U)) || ctrl.wp))) ) + return fault | PFEC_SYM(K); + } + } + + if ( walk & PFEC_SYM(U) ) /* User access. */ + { + if ( !(pte & _PAGE_USER) ) + return fault; /* User access from supervisor page. */ + + if ( (walk & PFEC_SYM(W)) && !(pte & _PAGE_RW) ) + return fault; /* Write to a read-only page. */ + } + else /* Supervisor access. */ + { + if ( (walk & PFEC_SYM(W)) && !(pte & _PAGE_RW) && ctrl.wp ) + return fault; /* Write to a read-only page with WP. */ + + if ( (pte & _PAGE_USER) && ctrl.smap && + ((walk & PFEC_SYM(IMP)) || !ctrl.ac) ) + return fault; /* Supervisor access from user page with SMAP and + * (Implicit or !AC)? */ + } + } + + /* Should succeed. */ + return 0; +} + +static void test_pte(struct mapping_info *m, uint64_t overlay, pg_ctrl_t ctrl) +{ + uint64_t new = m->paddr | overlay; + bool user; + const struct stubs *stubs; + + under_test = (struct test_printinfo){ + .pteval = new, + .active = true, + .va = _u(m->va), + .ctrl = ctrl, + }; + + *m->pte = new; + clear_ad(m); + + for ( user = false; ; user = true ) + { + unsigned int walk, base = user ? PFEC_SYM(U) : 0; + + under_test.user = user; + +#define CALL(fn, va) \ + (user ? exec_user_param(fn ## _user, _u(va)) : fn(_u(va))) + + for ( stubs = &native_stubs; ; stubs = &emul_stubs ) + { + under_test.force = stubs == &emul_stubs; + + /* Map the exec FEP stub with suitable permissions. */ + if ( under_test.force ) + { + if ( user ) + *m->fe_pte |= _PAGE_USER; + else + *m->fe_pte &= ~_PAGE_USER; + invlpg(m->va - 5); + } + + /* Basic read. */ + under_test.desc = "Read"; + walk = base | PFEC_SYM(); + check(m, CALL(stubs->read, m->va), + calc_fault(m, new, walk, ctrl), walk); + + /* Implicit read (always supervisor). `mov $LDT_SEL, %fs`. */ + if ( user || !shadow_paging || !(ctrl.smap && ctrl.ac) ) + { + under_test.desc = "Implicit Read"; + walk = PFEC_SYM(IMP); + check(m, CALL(stubs->implicit, LDT_SEL), + calc_fault(m, new, walk, ctrl), walk); + } + else + nr_tests--; /* Adjust later += 4 for skipping this test. */ + + /* Basic write. */ + under_test.desc = "Write"; + walk = base | PFEC_SYM(W); + check(m, CALL(stubs->write, m->va), + calc_fault(m, new, walk, ctrl), walk); + + /* Basic exec. */ + under_test.desc = "Exec"; + walk = base | PFEC_SYM(I); + check(m, CALL(stubs->exec, m->va), + calc_fault(m, new, walk, ctrl), walk); + + nr_tests += 4; + + if ( !xtf_has_fep || stubs == &emul_stubs ) + break; + } + + if ( user ) + break; + } + +#undef CALL + + under_test.active = false; +} + +static void shatter_console_superpage(void) +{ + /* + * Shatter the superpage mapping the PV console. We want to test with + * CR4.PSE disabled, at which point superpages stop working. + */ + uint64_t raw_pfn; + + if ( hvm_get_param(HVM_PARAM_CONSOLE_PFN, &raw_pfn) == 0 ) + { + unsigned int l2o = l2_table_offset(raw_pfn << PAGE_SHIFT); + + if ( (l2_identmap[l2o] & PF_SYM(PSE, P)) == PF_SYM(PSE, P) ) + { + static intpte_t conl1t[L1_PT_ENTRIES] __aligned(PAGE_SIZE); + paddr_t base_gfn = l2_identmap[l2o] >> PAGE_SHIFT; + unsigned int i; + + for ( i = 0; i < ARRAY_SIZE(conl1t); ++i ) + conl1t[i] = pte_from_gfn(base_gfn + i, PF_SYM(AD, RW, P)); + + l2_identmap[l2o] = pte_from_virt(conl1t, PF_SYM(AD, U, RW, P)); + } + } + + flush_tlb(true); +} + +static void foreach_pte_bit_combination(unsigned int level, bool super, + const pg_ctrl_t ctrl) +{ + unsigned long cr4 = read_cr4(); + const uint64_t base = super ? _PAGE_PSE : 0; + struct mapping_info m; + unsigned int i; + + for ( i = 0; i < ARRAY_SIZE(paddrs); ++i ) + { + paddr_t paddr = paddrs[i]; + + /* Skip unencodeable combinations. */ + if ( CONFIG_PAGING_LEVELS == 2 && paddr != (uint32_t)paddr && + (level == 1 || !cpu_has_pse36 || !(cr4 & X86_CR4_PSE) || !super) ) + continue; + + struct + { + bool cond; + uint64_t bit; + } trans_bits[] = { + { true, 0 }, + { true, _PAGE_GLOBAL }, + +#if CONFIG_PAGING_LEVELS == 2 + + { super && (cr4 & X86_CR4_PSE) && !cpu_has_pse36, 1ull << 13 }, + { super && (cr4 & X86_CR4_PSE) && !cpu_has_pse36, 1ull << 21 }, + + { super && paddr != (uint32_t)paddr, 1ull << 21 }, + + { super && paddr != (uint32_t)paddr && maxphysaddr < 39, + fold_pse36(1ull << 39) }, + { super && paddr != (uint32_t)paddr && maxphysaddr < 38, + fold_pse36(1ull << maxphysaddr) }, + +#else + + { super, 1ull << 13 }, + { super, PAGE_SIZE << (((level - 1) * PT_ORDER) - 1) }, + + { maxphysaddr < 50, 1ull << maxphysaddr }, + { maxphysaddr < 51, 1ull << 51 }, + { true, 1ull << 52 }, + { true, 0xfull << 59 }, /* PKEY 15 */ + { true, _PAGE_NX }, + +#endif + }; + + prepare_virtual_mappings(&m, level, super, paddr); + + /* Basic access rights are the bottom 3 bits in any pagetable entry. */ + for ( unsigned int ar = 0; ar <= PF_SYM(U, RW, P); ++ar ) + { + for ( unsigned idx = 0; idx < ARRAY_SIZE(trans_bits); ++idx ) + { + if ( !trans_bits[idx].cond ) + continue; + + test_pte(&m, base | trans_bits[idx].bit | ar, ctrl); + } + } + } +} + +static void foreach_pg_ctrl_setting(unsigned int level, bool super) +{ + unsigned long ctrl, max = (pg_ctrl_t){ .max = true, }.raw; + pg_ctrl_t missing = {}; + + missing.nx = !cpu_has_nx; + missing.smep = !cpu_has_smep; + missing.smap = !cpu_has_smap; + missing.pke = missing.pkru_wd = missing.pkru_ad = !cpu_has_pku; + missing.pks = missing.pkrs_wd = missing.pkrs_ad = !cpu_has_pks; + + uint64_t efer = rdmsr(MSR_EFER); + uint32_t pkrs = cpu_has_pks && rdmsr(MSR_PKRS); + unsigned long cr0 = read_cr0(); + unsigned long cr4 = read_cr4(); + + for ( ctrl = 0; ctrl < max; ++ctrl ) + { + pg_ctrl_t c = { ctrl }; + + if ( ctrl & missing.raw ) + { + ctrl += ((ctrl & missing.raw) - 1); + continue; + } + + uint64_t nefer = (efer & ~EFER_NXE) | (c.nx ? EFER_NXE : 0); + if ( nefer != efer ) + wrmsr(MSR_EFER, efer = nefer); + + unsigned long ncr0 = (cr0 & ~X86_CR0_WP) | (c.wp ? X86_CR0_WP : 0); + if ( ncr0 != cr0 ) + write_cr0(cr0 = ncr0); + + unsigned long ncr4 = + ((cr4 & ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_PKS)) | + (c.smep ? X86_CR4_SMEP : 0) | + (c.smap ? X86_CR4_SMAP : 0) | + (cpu_has_pku ? X86_CR4_PKE : 0) | /* Always set PKE to edit PKRU */ + (c.pks ? X86_CR4_PKS : 0)); + if ( ncr4 != cr4 ) + write_cr4(cr4 = ncr4); + + if ( cpu_has_smap ) + { + if ( c.ac ) + asm volatile ("stac"); + else + asm volatile ("clac"); + } + else + asm volatile ("pushf;" + "andl $~" STR(X86_EFLAGS_AC) ", (%%" _ASM_SP ");" + "orl %[ac], (%%" _ASM_SP ");" + "popf" + :: [ac] "r" (c.ac ? X86_EFLAGS_AC : 0)); + + if ( cr4 & X86_CR4_PKE ) + { + wrpkru((c.pkru_wd ? PKR_WD_VAL : 0) | + (c.pkru_ad ? PKR_AD_VAL : 0)); + if ( !c.pke ) + write_cr4(cr4 = (cr4 & ~X86_CR4_PKE)); + } + + uint32_t npkrs = ((c.pkrs_wd ? PKR_WD_VAL : 0) | + (c.pkrs_ad ? PKR_AD_VAL : 0)); + if ( npkrs != pkrs ) + wrmsr(MSR_PKRS, pkrs = npkrs); + + if ( CONFIG_PAGING_LEVELS == 0 ) + ; + else + foreach_pte_bit_combination(level, super, c); + } + + /* Reset environment. */ + wrmsr(MSR_EFER, efer & ~EFER_NXE); + write_cr0(cr0 & ~X86_CR0_WP); + if ( cpu_has_pku ) + { + if ( !(cr4 & X86_CR4_PKE) ) + write_cr4(cr4 | X86_CR4_PKE); + wrpkru(0); + } + write_cr4(cr4 & ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_PKS)); + if ( cpu_has_pks ) + wrmsr(MSR_PKRS, 0); +} + +static void test_pagetable_level(unsigned int level, bool super) +{ + printk("Test L%ue%s%s\n", + level, super ? " Superpage" : "", + CONFIG_PAGING_LEVELS == 2 && + !(read_cr4() & X86_CR4_PSE) ? " (No PSE)" : ""); + + foreach_pg_ctrl_setting(level, super); +} + +static void foreach_pagetable_level(void) +{ + test_pagetable_level(1, false); + + if ( CONFIG_PAGING_LEVELS == 2 ) + { + unsigned long cr4 = read_cr4(); + + write_cr4(cr4 & ~X86_CR4_PSE); + + test_pagetable_level(2, false); + test_pagetable_level(2, true); + + write_cr4(cr4 | X86_CR4_PSE); + } + + test_pagetable_level(2, false); + test_pagetable_level(2, true); + + if ( CONFIG_PAGING_LEVELS > 3 ) + { + test_pagetable_level(3, false); + test_pagetable_level(3, true); + test_pagetable_level(4, false); + test_pagetable_level(4, true); + } +} + +/** + * Calculate suitable physical addresses to use, and ensure there is ram + * behind them. The low paddr is needs sufficient alignment to test an L3 + * superpage if necessary, while the high paddr is chosen so the maximum + * usable physical address bit is set. + */ +static void prepare_physical_addresses(void) +{ + paddrs[0] = GB(1); + paddrs[1] = 1ull << (min(maxphysaddr, + CONFIG_PAGING_LEVELS == 2 + ? 40U + : (BITS_PER_LONG + PAGE_SHIFT)) - 1); + + unsigned long extents[] = { + (paddrs[0] >> PAGE_SHIFT) - 1, paddrs[0] >> PAGE_SHIFT, + (paddrs[1] >> PAGE_SHIFT) - 1, paddrs[1] >> PAGE_SHIFT, + }; + struct xen_memory_reservation mr = { + .extent_start = extents, + .nr_extents = ARRAY_SIZE(extents), + .domid = DOMID_SELF, + }; + int rc = hypercall_memory_op(XENMEM_populate_physmap, &mr); + + if ( rc != ARRAY_SIZE(extents) ) + return xtf_error("Failed populate_physmap: %d\n", rc); + + unsigned int i; + for ( i = 0; i < ARRAY_SIZE(paddrs); ++i ) + { + struct mapping_info m; + paddr_t paddr = paddrs[i]; + + if ( CONFIG_PAGING_LEVELS == 2 && paddr != (uint32_t)paddr ) + { + if ( cpu_has_pse36 ) + prepare_virtual_mappings(&m, 2, true, paddr); + else + { + printk("Unable to prepare paddr %"PRIpaddr"\n", paddr); + continue; + } + } + else + prepare_virtual_mappings(&m, 1, false, paddr); + + /* Put FEP immediately before va, and a ret instruction at va. */ + memcpy(m.va - 5, "\x0f\x0bxen\xc3", 6); + barrier(); + + /* Read them back, to confirm that RAM is properly in place. */ + if ( memcmp(m.va - 5, "\x0f\x0bxen\xc3", 6) ) + panic("Bad phys or virtual setup\n"); + + /* Construct the LDT at va. */ + user_desc *ldt = m.va; + + /* Bottom byte of the LDT must be a `ret`, as it is also executed. */ + ldt[LDT_SEL >> 3] = GDTE_SYM(0, 0xfffc3, COMMON, DATA, DPL3, B, W); + pack_ldt_desc(&gdt[GDTE_AVAIL0], ldt, 7); + + lldt(GDTE_AVAIL0 << 3); + + /* Check that we can successfully load the LDT selector. */ + write_fs(LDT_SEL); + } +} + +static void probe_paging_mode(void) +{ + /* + * Shadow paging vs hap should be indistinguishable to guests. + * + * Shadow paging doesn't support PSE36, so this feature is nominally + * hidden from guests. + * + * Luckily(?), because old versions of HyperV refuse to boot if they don't + * see PSE36, it is purposefully leaked once PAE is enabled to keep HyperV + * happy. + * + * As a result, our shadow paging heuristic is that the visibility of the + * PSE36 feature changes as we flip in and out of PAE paging mode. + */ + unsigned int _1d; + unsigned long cr0 = read_cr0(), cr4 = read_cr4(); + + switch ( CONFIG_PAGING_LEVELS ) + { + case 0: + write_cr4(cr4 | X86_CR4_PAE); + write_cr3(_u(pae32_l3_identmap)); + write_cr0(cr0 | X86_CR0_PG); + + _1d = cpuid_edx(1); + + write_cr0(cr0); + write_cr4(cr4); + break; + + case 2: + write_cr0(cr0 & ~X86_CR0_PG); + + write_cr3(_u(pae32_l3_identmap)); + + write_cr4(cr4 | X86_CR4_PAE); + write_cr0(cr0); + + _1d = cpuid_edx(1); + + write_cr0(cr0 & ~X86_CR0_PG); + write_cr4(cr4 & ~X86_CR4_PAE); + + write_cr3(_u(cr3_target)); + + write_cr0(cr0); + break; + + case 3: + write_cr0(cr0 & ~X86_CR0_PG); + write_cr4(cr4 & ~X86_CR4_PAE); + + write_cr3(_u(pse_l2_identmap)); + + write_cr0(cr0); + + _1d = cpuid_edx(1); + + write_cr0(cr0 & ~X86_CR0_PG); + + write_cr3(_u(cr3_target)); + + write_cr4(cr4); + write_cr0(cr0); + break; + + case 4: + /* Easier to write in ASM, as it involves dropping to 32bit mode. */ + _1d = stub_64bit_probe_paging_mode(); + break; + } + + shadow_paging = cpu_has_pse36 ^ !!(_1d & cpufeat_mask(X86_FEATURE_PSE36)); + + printk(" Paging mode heuristic: %s\n", shadow_paging ? "Shadow" : "Hap"); +} + +void test_main(void) +{ + if ( !vendor_is_intel && !vendor_is_amd ) + xtf_warning("Unknown CPU vendor. Something might go wrong\n"); + + printk(" Info: %s, Fam %u, Model %u, Stepping %u, paddr %u, vaddr %u\n" + " Features:%s%s%s%s%s%s%s%s%s%s%s%s\n", + x86_vendor_name(x86_vendor), + x86_family, x86_model, x86_stepping, maxphysaddr, maxvirtaddr, + cpu_has_pse ? " PSE" : "", + cpu_has_pae ? " PAE" : "", + cpu_has_pge ? " PGE" : "", + cpu_has_pat ? " PAT" : "", + cpu_has_pse36 ? " PSE36" : "", + cpu_has_pcid ? " PCID" : "", + cpu_has_nx ? " NX" : "", + cpu_has_page1gb ? " PAGE1G" : "", + cpu_has_smep ? " SMEP" : "", + cpu_has_smap ? " SMAP" : "", + cpu_has_pku ? " PKU" : "", + cpu_has_pks ? " PKS" : ""); + + pa_rsvd = ((1ull << 52) - 1) & ~((1ull << maxphysaddr) - 1); + + if ( !xtf_has_fep ) + xtf_skip("FEP support not detected - some tests will be skipped\n"); + + probe_paging_mode(); + + if ( CONFIG_PAGING_LEVELS == 0 ) + { + printk("Test control settings in unpaged environment\n"); + + foreach_pg_ctrl_setting(0, false); + + return xtf_success(NULL); + } + + if ( CONFIG_PAGING_LEVELS == 2 ) + shatter_console_superpage(); + + /* Sanitise environment. */ + wrmsr(MSR_EFER, rdmsr(MSR_EFER) & ~EFER_NXE); + write_cr0(read_cr0() & ~X86_CR0_WP); + unsigned long cr4 = read_cr4(); + if ( cpu_has_pku ) + { + write_cr4(cr4 | X86_CR4_PKE); + wrpkru(0); + } + write_cr4(cr4 & ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_PKS)); + if ( cpu_has_pks ) + wrmsr(MSR_PKRS, 0); + + prepare_physical_addresses(); + + printk(" Using physical addresses %"PRIpaddr" and %"PRIpaddr"\n", + paddrs[0], paddrs[1]); + + foreach_pagetable_level(); + + printk("Completed %lu tests\n", nr_tests); + xtf_success(NULL); +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/tests/pagetable-emulation/stubs.S b/tests/pagetable-emulation/stubs.S new file mode 100644 index 0000000..112cfd0 --- /dev/null +++ b/tests/pagetable-emulation/stubs.S @@ -0,0 +1,174 @@ +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef __i386__ +# define REG edx +# define REG8 dl +#else +# define REG rdi +# define REG8 dil +#endif + +.macro GEN_SINGLE name, type, force=0 + +ENTRY(stub_\name) /* exinfo_t stub_$name(unsigned long param) */ + + /* Default to no fault. Filled by ex_check_pf() in exception context. */ + xor %eax, %eax + +#ifdef __i386__ + /* Grap 'param' off the stack. */ + mov 4(%esp), %REG +#endif + + /* Insert FEP for the non-exec tests. */ + .ifnc \type, X + .if \force + _ASM_XEN_FEP + .endif + .endif + + /* Fauling address for the non-exec tests. */ +.Lstub_\name\()_fault: + + .ifc \type, R + /* Read (and trash) the pointer. */ + mov (%REG), %REG8 + .endif + + .ifc \type, I + /* Load %fs with the selector in %REG. Causes implicit GDT/LDT access. */ + mov %REG, %fs + .endif + + .ifc \type, W + /* Write `ret` to the pointer. */ + movb $0xc3, (%REG) + .endif + + .ifc \type, X + /* Call the pointer. */ + + .if \force + /* If force, adjust the function pointer to include the force emulation prefix. */ + sub $5, %REG + .endif + + call *%REG + .endif + + /* Fixup address for faults. */ +.Lstub_\name\()_fixup: + ret + +ENDFUNC(stub_\name) + + .ifnc \type, X + _ASM_EXTABLE_HANDLER(.Lstub_\name\()_fault, + .Lstub_\name\()_fixup, + ex_check_pf) + .endif +.endm + +GEN_SINGLE read R +GEN_SINGLE implicit I +GEN_SINGLE write W +GEN_SINGLE exec X +GEN_SINGLE force_read R 1 +GEN_SINGLE force_implicit I 1 +GEN_SINGLE force_write W 1 +GEN_SINGLE force_exec X 1 + +.pushsection .text.user, "ax", @progbits + +GEN_SINGLE read_user R +GEN_SINGLE implicit_user I +GEN_SINGLE write_user W +GEN_SINGLE exec_user X +GEN_SINGLE force_read_user R 1 +GEN_SINGLE force_implicit_user I 1 +GEN_SINGLE force_write_user W 1 +GEN_SINGLE force_exec_user X 1 + +.popsection + +#ifdef __x86_64__ +ENTRY(stub_64bit_probe_paging_mode) + push %rbx + + push $(GDTE_CS32_DPL0 * 8) + push $start_32bit + lretq + + .code32 +start_32bit: + + /* Disable paging. */ + mov %cr0, %esi + mov %esi, %eax + and $~X86_CR0_PG, %eax + mov %eax, %cr0 + + /* Flip CR4.PAE and PSE. */ + mov %cr4, %edi + mov %edi, %eax + and $~X86_CR4_PAE, %eax + or $ X86_CR4_PSE, %eax + mov %eax, %cr4 + + /* Pick %cr3 for 2-level paging. */ + mov $pse_l2_identmap, %eax + mov %eax, %cr3 + + /* Disable long mode. */ + mov $MSR_EFER, %ecx + rdmsr + and $~EFER_LME, %eax + wrmsr + + /* Re-enable paging. */ + mov %esi, %cr0 + + /* Query CPUID.1 and stash %edx for the return value. */ + mov $1, %eax + cpuid + mov %edx, %ebx + + /* Disable paging. */ + mov %esi, %eax + and $~X86_CR0_PG, %eax + mov %eax, %cr0 + + /* Enable long mode. */ + mov $MSR_EFER, %ecx + rdmsr + or $EFER_LME, %eax + wrmsr + + /* Restore CR4.PAE. */ + mov %edi, %cr4 + + /* Pick %cr3 for 4-level paging. */ + mov $pae_l4_identmap, %eax + mov %eax, %cr3 + + /* Re-enable paging. */ + mov %esi, %cr0 + + ljmp $__KERN_CS, $stop_32bit +stop_32bit: + .code64 + + mov %ebx, %eax + + pop %rbx + ret +ENDFUNC(stub_64bit_probe_paging_mode) +#endif /* __x86_64__ */ diff --git a/tests/pagetable-emulation/stubs.h b/tests/pagetable-emulation/stubs.h new file mode 100644 index 0000000..c0582d3 --- /dev/null +++ b/tests/pagetable-emulation/stubs.h @@ -0,0 +1,27 @@ +#ifndef _STUBS_H_ +#define _STUBS_H_ + +unsigned long stub_read(unsigned long va); +unsigned long stub_implicit(unsigned long sel); +unsigned long stub_write(unsigned long va); +unsigned long stub_exec(unsigned long va); + +unsigned long stub_force_read(unsigned long va); +unsigned long stub_force_implicit(unsigned long sel); +unsigned long stub_force_write(unsigned long va); +unsigned long stub_force_exec(unsigned long va); + +unsigned long stub_read_user(unsigned long va); +unsigned long stub_implicit_user(unsigned long sel); +unsigned long stub_write_user(unsigned long va); +unsigned long stub_exec_user(unsigned long va); + +unsigned long stub_force_read_user(unsigned long va); +unsigned long stub_force_implicit_user(unsigned long sel); +unsigned long stub_force_write_user(unsigned long va); +unsigned long stub_force_exec_user(unsigned long va); + +/* Returns CPUID.1.D */ +uint32_t stub_64bit_probe_paging_mode(void); + +#endif