ia64/xen-unstable

changeset 17406:9b635405ef90

x86, vmx: Enable EPT (Extended PageTable) support on new Intel processors.

We use the EPT page table as P2M (guest physical to machine
mapping), removing the linear page table when EPT is used for the
domain (see the new file p2m-ept.c). We did this by adding three
operations in the p2m_domain. If VT-d is enabled, the EPT page table
will be used as the VT-d page table as well (i.e. shared).

Signed-off-by: Xin Li <xin.b.li@intel.com>
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xiaohui Xin <Xiaohui.xin@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Apr 09 11:30:32 2008 +0100 (2008-04-09)
parents e1962ac0fb1c
children 32e3c81ada56
files tools/libxc/xc_hvm_build.c xen/arch/x86/domain.c xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/mm.c xen/arch/x86/mm/hap/Makefile xen/arch/x86/mm/hap/p2m-ept.c xen/arch/x86/mm/p2m.c xen/arch/x86/mm/paging.c xen/common/domctl.c xen/drivers/passthrough/vtd/iommu.c xen/include/asm-x86/domain.h xen/include/asm-x86/hvm/domain.h xen/include/asm-x86/hvm/svm/vmcb.h xen/include/asm-x86/hvm/vmx/vmcs.h xen/include/asm-x86/hvm/vmx/vmx.h xen/include/asm-x86/p2m.h xen/include/asm-x86/paging.h xen/include/public/hvm/params.h xen/include/xen/hypercall.h
line diff
     1.1 --- a/tools/libxc/xc_hvm_build.c	Tue Apr 08 11:41:27 2008 +0100
     1.2 +++ b/tools/libxc/xc_hvm_build.c	Wed Apr 09 11:30:32 2008 +0100
     1.3 @@ -21,6 +21,13 @@
     1.4  
     1.5  #define SCRATCH_PFN 0xFFFFF
     1.6  
     1.7 +#define SPECIALPAGE_GUARD    0
     1.8 +#define SPECIALPAGE_BUFIOREQ 1
     1.9 +#define SPECIALPAGE_XENSTORE 2
    1.10 +#define SPECIALPAGE_IOREQ    3
    1.11 +#define SPECIALPAGE_IDENT_PT 4
    1.12 +#define NR_SPECIAL_PAGES     5
    1.13 +
    1.14  static void build_e820map(void *e820_page, unsigned long long mem_size)
    1.15  {
    1.16      struct e820entry *e820entry =
    1.17 @@ -77,21 +84,16 @@ static void build_e820map(void *e820_pag
    1.18      e820entry[nr_map].type = E820_RESERVED;
    1.19      nr_map++;
    1.20  
    1.21 -    /*
    1.22 -     * Low RAM goes here. Remove 4 pages for: ioreq, bufioreq, and xenstore.
    1.23 -     *  1. Guard page.
    1.24 -     *  2. Buffered ioreq.
    1.25 -     *  3. Xenstore.
    1.26 -     *  4. Normal ioreq.
    1.27 -     */
    1.28 +    /* Low RAM goes here. Reserve space for special pages. */
    1.29      e820entry[nr_map].addr = 0x100000;
    1.30 -    e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 4;
    1.31 +    e820entry[nr_map].size = (mem_size - 0x100000 -
    1.32 +                              PAGE_SIZE * NR_SPECIAL_PAGES);
    1.33      e820entry[nr_map].type = E820_RAM;
    1.34      nr_map++;
    1.35  
    1.36 -    /* Explicitly reserve space for special pages. */
    1.37 -    e820entry[nr_map].addr = mem_size - PAGE_SIZE * 3;
    1.38 -    e820entry[nr_map].size = PAGE_SIZE * 3;
    1.39 +    /* Explicitly reserve space for special pages (excluding guard page). */
    1.40 +    e820entry[nr_map].addr = mem_size - PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
    1.41 +    e820entry[nr_map].size = PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
    1.42      e820entry[nr_map].type = E820_RESERVED;
    1.43      nr_map++;
    1.44  
    1.45 @@ -156,10 +158,11 @@ static int setup_guest(int xc_handle,
    1.46  {
    1.47      xen_pfn_t *page_array = NULL;
    1.48      unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
    1.49 -    unsigned long shared_page_nr, entry_eip;
    1.50 +    unsigned long special_page_nr, entry_eip;
    1.51      struct xen_add_to_physmap xatp;
    1.52      struct shared_info *shared_info;
    1.53      void *e820_page;
    1.54 +    uint32_t *ident_pt;
    1.55      struct elf_binary elf;
    1.56      uint64_t v_start, v_end;
    1.57      int rc;
    1.58 @@ -245,29 +248,46 @@ static int setup_guest(int xc_handle,
    1.59             sizeof(shared_info->evtchn_mask));
    1.60      munmap(shared_info, PAGE_SIZE);
    1.61  
    1.62 -    if ( v_end > HVM_BELOW_4G_RAM_END )
    1.63 -        shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
    1.64 -    else
    1.65 -        shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
    1.66 +    special_page_nr = (((v_end > HVM_BELOW_4G_RAM_END)
    1.67 +                        ? (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT)
    1.68 +                        : (v_end >> PAGE_SHIFT))
    1.69 +                       - NR_SPECIAL_PAGES);
    1.70 +
    1.71 +    /* Paranoia: clean special pages. */
    1.72 +    for ( i = 0; i < NR_SPECIAL_PAGES; i++ )
    1.73 +        if ( xc_clear_domain_page(xc_handle, dom, special_page_nr + i) )
    1.74 +            goto error_out;
    1.75  
    1.76      /* Free the guard page that separates low RAM from special pages. */
    1.77      rc = xc_domain_memory_decrease_reservation(
    1.78 -            xc_handle, dom, 1, 0, &page_array[shared_page_nr-3]);
    1.79 +        xc_handle, dom, 1, 0, &page_array[special_page_nr]);
    1.80      if ( rc != 0 )
    1.81      {
    1.82          PERROR("Could not deallocate guard page for HVM guest.\n");
    1.83          goto error_out;
    1.84      }
    1.85  
    1.86 -    /* Paranoia: clean pages. */
    1.87 -    if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
    1.88 -         xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
    1.89 -         xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) )
    1.90 +    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
    1.91 +                     special_page_nr + SPECIALPAGE_XENSTORE);
    1.92 +    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
    1.93 +                     special_page_nr + SPECIALPAGE_BUFIOREQ);
    1.94 +    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
    1.95 +                     special_page_nr + SPECIALPAGE_IOREQ);
    1.96 +
    1.97 +    /*
    1.98 +     * Identity-map page table is required for running with CR0.PG=0 when
    1.99 +     * using Intel EPT. Create a 32-bit non-PAE page directory of superpages.
   1.100 +     */
   1.101 +    if ( (ident_pt = xc_map_foreign_range(
   1.102 +              xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
   1.103 +              special_page_nr + SPECIALPAGE_IDENT_PT)) == NULL )
   1.104          goto error_out;
   1.105 -
   1.106 -    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
   1.107 -    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
   1.108 -    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
   1.109 +    for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
   1.110 +        ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
   1.111 +                       _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
   1.112 +    munmap(ident_pt, PAGE_SIZE);
   1.113 +    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
   1.114 +                     special_page_nr + SPECIALPAGE_IDENT_PT);
   1.115  
   1.116      /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
   1.117      entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
     2.1 --- a/xen/arch/x86/domain.c	Tue Apr 08 11:41:27 2008 +0100
     2.2 +++ b/xen/arch/x86/domain.c	Wed Apr 09 11:30:32 2008 +0100
     2.3 @@ -503,13 +503,15 @@ int arch_domain_create(struct domain *d,
     2.4      HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START;
     2.5  #endif
     2.6  
     2.7 -    paging_domain_init(d);
     2.8 +    if ( (rc = paging_domain_init(d)) != 0 )
     2.9 +        goto fail;
    2.10      paging_initialised = 1;
    2.11  
    2.12      if ( !is_idle_domain(d) )
    2.13      {
    2.14          d->arch.ioport_caps = 
    2.15              rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
    2.16 +        rc = -ENOMEM;
    2.17          if ( d->arch.ioport_caps == NULL )
    2.18              goto fail;
    2.19  
     3.1 --- a/xen/arch/x86/hvm/hvm.c	Tue Apr 08 11:41:27 2008 +0100
     3.2 +++ b/xen/arch/x86/hvm/hvm.c	Wed Apr 09 11:30:32 2008 +0100
     3.3 @@ -2212,6 +2212,33 @@ long do_hvm_op(unsigned long op, XEN_GUE
     3.4                  if ( a.value > HVMPTM_one_missed_tick_pending )
     3.5                      goto param_fail;
     3.6                  break;
     3.7 +            case HVM_PARAM_IDENT_PT:
     3.8 +                rc = -EPERM;
     3.9 +                if ( current->domain->domain_id != 0 )
    3.10 +                    goto param_fail;
    3.11 +
    3.12 +                rc = -EINVAL;
    3.13 +                if ( d->arch.hvm_domain.params[a.index] != 0 )
    3.14 +                    goto param_fail;
    3.15 +
    3.16 +                if ( !paging_mode_hap(d) )
    3.17 +                    break;
    3.18 +
    3.19 +                domain_pause(d);
    3.20 +
    3.21 +                /*
    3.22 +                 * Update GUEST_CR3 in each VMCS to point at identity map.
    3.23 +                 * All foreign updates to guest state must synchronise on
    3.24 +                 * the domctl_lock.
    3.25 +                 */
    3.26 +                spin_lock(&domctl_lock);
    3.27 +                d->arch.hvm_domain.params[a.index] = a.value;
    3.28 +                for_each_vcpu ( d, v )
    3.29 +                    paging_update_cr3(v);
    3.30 +                spin_unlock(&domctl_lock);
    3.31 +
    3.32 +                domain_unpause(d);
    3.33 +                break;
    3.34              }
    3.35              d->arch.hvm_domain.params[a.index] = a.value;
    3.36              rc = 0;
     4.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Tue Apr 08 11:41:27 2008 +0100
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Wed Apr 09 11:30:32 2008 +0100
     4.3 @@ -84,14 +84,16 @@ static void vmx_init_vmcs_config(void)
     4.4  
     4.5      min = (CPU_BASED_HLT_EXITING |
     4.6             CPU_BASED_INVLPG_EXITING |
     4.7 +           CPU_BASED_CR3_LOAD_EXITING |
     4.8 +           CPU_BASED_CR3_STORE_EXITING |
     4.9             CPU_BASED_MONITOR_EXITING |
    4.10             CPU_BASED_MWAIT_EXITING |
    4.11             CPU_BASED_MOV_DR_EXITING |
    4.12             CPU_BASED_ACTIVATE_IO_BITMAP |
    4.13             CPU_BASED_USE_TSC_OFFSETING);
    4.14 -    opt  = CPU_BASED_ACTIVATE_MSR_BITMAP;
    4.15 -    opt |= CPU_BASED_TPR_SHADOW;
    4.16 -    opt |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
    4.17 +    opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
    4.18 +           CPU_BASED_TPR_SHADOW |
    4.19 +           CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
    4.20      _vmx_cpu_based_exec_control = adjust_vmx_controls(
    4.21          min, opt, MSR_IA32_VMX_PROCBASED_CTLS);
    4.22  #ifdef __x86_64__
    4.23 @@ -107,11 +109,23 @@ static void vmx_init_vmcs_config(void)
    4.24      {
    4.25          min = 0;
    4.26          opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
    4.27 -               SECONDARY_EXEC_WBINVD_EXITING);
    4.28 +               SECONDARY_EXEC_WBINVD_EXITING |
    4.29 +               SECONDARY_EXEC_ENABLE_EPT);
    4.30          _vmx_secondary_exec_control = adjust_vmx_controls(
    4.31              min, opt, MSR_IA32_VMX_PROCBASED_CTLS2);
    4.32      }
    4.33  
    4.34 +    if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
    4.35 +    {
    4.36 +        /* To use EPT we expect to be able to clear certain intercepts. */
    4.37 +        uint32_t must_be_one, must_be_zero;
    4.38 +        rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, must_be_one, must_be_zero);
    4.39 +        if ( must_be_one & (CPU_BASED_INVLPG_EXITING |
    4.40 +                            CPU_BASED_CR3_LOAD_EXITING |
    4.41 +                            CPU_BASED_CR3_STORE_EXITING) )
    4.42 +            _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
    4.43 +    }
    4.44 +
    4.45  #if defined(__i386__)
    4.46      /* If we can't virtualise APIC accesses, the TPR shadow is pointless. */
    4.47      if ( !(_vmx_secondary_exec_control &
    4.48 @@ -301,6 +315,8 @@ int vmx_cpu_up(void)
    4.49          return 0;
    4.50      }
    4.51  
    4.52 +    ept_sync_all();
    4.53 +
    4.54      return 1;
    4.55  }
    4.56  
    4.57 @@ -439,6 +455,7 @@ void vmx_disable_intercept_for_msr(struc
    4.58  
    4.59  static int construct_vmcs(struct vcpu *v)
    4.60  {
    4.61 +    struct domain *d = v->domain;
    4.62      uint16_t sysenter_cs;
    4.63      unsigned long sysenter_eip;
    4.64  
    4.65 @@ -448,10 +465,25 @@ static int construct_vmcs(struct vcpu *v
    4.66      __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
    4.67      __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
    4.68      __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
    4.69 -    __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control);
    4.70 +
    4.71      v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
    4.72 -    if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
    4.73 -        __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
    4.74 +    v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
    4.75 +
    4.76 +    if ( paging_mode_hap(d) )
    4.77 +    {
    4.78 +        v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
    4.79 +                                          CPU_BASED_CR3_LOAD_EXITING |
    4.80 +                                          CPU_BASED_CR3_STORE_EXITING);
    4.81 +    }
    4.82 +    else
    4.83 +    {
    4.84 +        v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
    4.85 +    }
    4.86 +
    4.87 +    __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
    4.88 +    if ( cpu_has_vmx_secondary_exec_control )
    4.89 +        __vmwrite(SECONDARY_VM_EXEC_CONTROL,
    4.90 +                  v->arch.hvm_vmx.secondary_exec_control);
    4.91  
    4.92      /* MSR access bitmap. */
    4.93      if ( cpu_has_vmx_msr_bitmap )
    4.94 @@ -570,9 +602,10 @@ static int construct_vmcs(struct vcpu *v
    4.95      __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
    4.96  #endif
    4.97  
    4.98 -    __vmwrite(EXCEPTION_BITMAP, (HVM_TRAP_MASK |
    4.99 -                                 (1U << TRAP_page_fault) |
   4.100 -                                 (1U << TRAP_no_device)));
   4.101 +    __vmwrite(EXCEPTION_BITMAP,
   4.102 +              HVM_TRAP_MASK
   4.103 +              | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
   4.104 +              | (1U << TRAP_no_device));
   4.105  
   4.106      v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
   4.107      hvm_update_guest_cr(v, 0);
   4.108 @@ -587,6 +620,15 @@ static int construct_vmcs(struct vcpu *v
   4.109          __vmwrite(TPR_THRESHOLD, 0);
   4.110      }
   4.111  
   4.112 +    if ( paging_mode_hap(d) )
   4.113 +    {
   4.114 +        __vmwrite(EPT_POINTER, d->arch.hvm_domain.vmx.ept_control.eptp);
   4.115 +#ifdef CONFIG_X86_PAE
   4.116 +        __vmwrite(EPT_POINTER_HIGH,
   4.117 +                  d->arch.hvm_domain.vmx.ept_control.eptp >> 32);
   4.118 +#endif
   4.119 +    }
   4.120 +
   4.121      vmx_vmcs_exit(v);
   4.122  
   4.123      paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
   4.124 @@ -932,6 +974,8 @@ void vmcs_dump_vcpu(struct vcpu *v)
   4.125             (uint32_t)vmr(IDT_VECTORING_ERROR_CODE));
   4.126      printk("TPR Threshold = 0x%02x\n",
   4.127             (uint32_t)vmr(TPR_THRESHOLD));
   4.128 +    printk("EPT pointer = 0x%08x%08x\n",
   4.129 +           (uint32_t)vmr(EPT_POINTER_HIGH), (uint32_t)vmr(EPT_POINTER));
   4.130  
   4.131      vmx_vmcs_exit(v);
   4.132  }
     5.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Tue Apr 08 11:41:27 2008 +0100
     5.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Wed Apr 09 11:30:32 2008 +0100
     5.3 @@ -71,11 +71,17 @@ static void vmx_invlpg_intercept(unsigne
     5.4  
     5.5  static int vmx_domain_initialise(struct domain *d)
     5.6  {
     5.7 +    d->arch.hvm_domain.vmx.ept_control.etmt = EPT_DEFAULT_MT;
     5.8 +    d->arch.hvm_domain.vmx.ept_control.gaw  = EPT_DEFAULT_GAW;
     5.9 +    d->arch.hvm_domain.vmx.ept_control.asr  =
    5.10 +        pagetable_get_pfn(d->arch.phys_table);
    5.11 +
    5.12      return vmx_alloc_vlapic_mapping(d);
    5.13  }
    5.14  
    5.15  static void vmx_domain_destroy(struct domain *d)
    5.16  {
    5.17 +    ept_sync_domain(d);
    5.18      vmx_free_vlapic_mapping(d);
    5.19  }
    5.20  
    5.21 @@ -492,21 +498,24 @@ static int vmx_restore_cr0_cr3(
    5.22      unsigned long mfn = 0;
    5.23      p2m_type_t p2mt;
    5.24  
    5.25 -    if ( cr0 & X86_CR0_PG )
    5.26 +    if ( paging_mode_shadow(v->domain) )
    5.27      {
    5.28 -        mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
    5.29 -        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
    5.30 +        if ( cr0 & X86_CR0_PG )
    5.31          {
    5.32 -            gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3);
    5.33 -            return -EINVAL;
    5.34 +            mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
    5.35 +            if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
    5.36 +            {
    5.37 +                gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3);
    5.38 +                return -EINVAL;
    5.39 +            }
    5.40          }
    5.41 +
    5.42 +        if ( hvm_paging_enabled(v) )
    5.43 +            put_page(pagetable_get_page(v->arch.guest_table));
    5.44 +
    5.45 +        v->arch.guest_table = pagetable_from_pfn(mfn);
    5.46      }
    5.47  
    5.48 -    if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
    5.49 -        put_page(pagetable_get_page(v->arch.guest_table));
    5.50 -
    5.51 -    v->arch.guest_table = pagetable_from_pfn(mfn);
    5.52 -
    5.53      v->arch.hvm_vcpu.guest_cr[0] = cr0 | X86_CR0_ET;
    5.54      v->arch.hvm_vcpu.guest_cr[3] = cr3;
    5.55  
    5.56 @@ -900,6 +909,56 @@ static void vmx_set_interrupt_shadow(str
    5.57      __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
    5.58  }
    5.59  
    5.60 +static void vmx_load_pdptrs(struct vcpu *v)
    5.61 +{
    5.62 +    unsigned long cr3 = v->arch.hvm_vcpu.guest_cr[3], mfn;
    5.63 +    uint64_t *guest_pdptrs;
    5.64 +    p2m_type_t p2mt;
    5.65 +    char *p;
    5.66 +
    5.67 +    /* EPT needs to load PDPTRS into VMCS for PAE. */
    5.68 +    if ( !hvm_pae_enabled(v) || (v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
    5.69 +        return;
    5.70 +
    5.71 +    if ( cr3 & 0x1fUL )
    5.72 +        goto crash;
    5.73 +
    5.74 +    mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
    5.75 +    if ( !p2m_is_ram(p2mt) )
    5.76 +        goto crash;
    5.77 +
    5.78 +    p = map_domain_page(mfn);
    5.79 +
    5.80 +    guest_pdptrs = (uint64_t *)(p + (cr3 & ~PAGE_MASK));
    5.81 +
    5.82 +    /*
    5.83 +     * We do not check the PDPTRs for validity. The CPU will do this during
    5.84 +     * vm entry, and we can handle the failure there and crash the guest.
    5.85 +     * The only thing we could do better here is #GP instead.
    5.86 +     */
    5.87 +
    5.88 +    vmx_vmcs_enter(v);
    5.89 +
    5.90 +    __vmwrite(GUEST_PDPTR0, guest_pdptrs[0]);
    5.91 +    __vmwrite(GUEST_PDPTR1, guest_pdptrs[1]);
    5.92 +    __vmwrite(GUEST_PDPTR2, guest_pdptrs[2]);
    5.93 +    __vmwrite(GUEST_PDPTR3, guest_pdptrs[3]);
    5.94 +#ifdef CONFIG_X86_PAE
    5.95 +    __vmwrite(GUEST_PDPTR0_HIGH, guest_pdptrs[0] >> 32);
    5.96 +    __vmwrite(GUEST_PDPTR1_HIGH, guest_pdptrs[1] >> 32);
    5.97 +    __vmwrite(GUEST_PDPTR2_HIGH, guest_pdptrs[2] >> 32);
    5.98 +    __vmwrite(GUEST_PDPTR3_HIGH, guest_pdptrs[3] >> 32);
    5.99 +#endif
   5.100 +
   5.101 +    vmx_vmcs_exit(v);
   5.102 +
   5.103 +    unmap_domain_page(p);
   5.104 +    return;
   5.105 +
   5.106 + crash:
   5.107 +    domain_crash(v->domain);
   5.108 +}
   5.109 +
   5.110  static void vmx_update_host_cr3(struct vcpu *v)
   5.111  {
   5.112      vmx_vmcs_enter(v);
   5.113 @@ -915,7 +974,24 @@ static void vmx_update_guest_cr(struct v
   5.114      {
   5.115      case 0: {
   5.116          unsigned long hw_cr0_mask =
   5.117 -            X86_CR0_NE | X86_CR0_PG | X86_CR0_WP | X86_CR0_PE;
   5.118 +            X86_CR0_NE | X86_CR0_PG | X86_CR0_PE;
   5.119 +
   5.120 +        if ( paging_mode_shadow(v->domain) )
   5.121 +           hw_cr0_mask |= X86_CR0_WP;
   5.122 +
   5.123 +        if ( paging_mode_hap(v->domain) )
   5.124 +        {
   5.125 +            /* We manage GUEST_CR3 when guest CR0.PE is zero. */
   5.126 +            uint32_t cr3_ctls = (CPU_BASED_CR3_LOAD_EXITING |
   5.127 +                                 CPU_BASED_CR3_STORE_EXITING);
   5.128 +            v->arch.hvm_vmx.exec_control &= ~cr3_ctls;
   5.129 +            if ( !hvm_paging_enabled(v) )
   5.130 +                v->arch.hvm_vmx.exec_control |= cr3_ctls;
   5.131 +            __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
   5.132 +
   5.133 +            /* Changing CR0.PE can change some bits in real CR4. */
   5.134 +            vmx_update_guest_cr(v, 4);
   5.135 +        }
   5.136  
   5.137          if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
   5.138          {
   5.139 @@ -939,11 +1015,26 @@ static void vmx_update_guest_cr(struct v
   5.140          /* CR2 is updated in exit stub. */
   5.141          break;
   5.142      case 3:
   5.143 +        if ( paging_mode_hap(v->domain) )
   5.144 +        {
   5.145 +            if ( !hvm_paging_enabled(v) )
   5.146 +                v->arch.hvm_vcpu.hw_cr[3] =
   5.147 +                    v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT];
   5.148 +            vmx_load_pdptrs(v);
   5.149 +        }
   5.150 + 
   5.151          __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
   5.152          break;
   5.153      case 4:
   5.154 -        v->arch.hvm_vcpu.hw_cr[4] =
   5.155 -            v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK;
   5.156 +        v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK;
   5.157 +        if ( paging_mode_hap(v->domain) )
   5.158 +            v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
   5.159 +        v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4];
   5.160 +        if ( paging_mode_hap(v->domain) && !hvm_paging_enabled(v) )
   5.161 +        {
   5.162 +            v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE;
   5.163 +            v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
   5.164 +        }
   5.165          __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
   5.166          __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
   5.167          break;
   5.168 @@ -983,7 +1074,18 @@ static void vmx_flush_guest_tlbs(void)
   5.169       * because VMRESUME will flush it for us. */
   5.170  }
   5.171  
   5.172 +static void __ept_sync_domain(void *info)
   5.173 +{
   5.174 +    struct domain *d = info;
   5.175 +    __invept(1, d->arch.hvm_domain.vmx.ept_control.eptp, 0);
   5.176 +}
   5.177  
   5.178 +void ept_sync_domain(struct domain *d)
   5.179 +{
   5.180 +    /* Only if using EPT and this domain has some VCPUs to dirty. */
   5.181 +    if ( d->arch.hvm_domain.hap_enabled && d->vcpu[0] )
   5.182 +        on_each_cpu(__ept_sync_domain, d, 1, 1);
   5.183 +}
   5.184  
   5.185  static void __vmx_inject_exception(
   5.186      struct vcpu *v, int trap, int type, int error_code)
   5.187 @@ -1133,6 +1235,12 @@ void start_vmx(void)
   5.188          return;
   5.189      }
   5.190  
   5.191 +    if ( cpu_has_vmx_ept )
   5.192 +    {
   5.193 +        printk("VMX: EPT is available.\n");
   5.194 +        vmx_function_table.hap_supported = 1;
   5.195 +    }
   5.196 +
   5.197      setup_vmcs_dump();
   5.198  
   5.199      hvm_enable(&vmx_function_table);
   5.200 @@ -1635,14 +1743,14 @@ static int vmx_alloc_vlapic_mapping(stru
   5.201      share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
   5.202      set_mmio_p2m_entry(
   5.203          d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), _mfn(virt_to_mfn(apic_va)));
   5.204 -    d->arch.hvm_domain.vmx_apic_access_mfn = virt_to_mfn(apic_va);
   5.205 +    d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
   5.206  
   5.207      return 0;
   5.208  }
   5.209  
   5.210  static void vmx_free_vlapic_mapping(struct domain *d)
   5.211  {
   5.212 -    unsigned long mfn = d->arch.hvm_domain.vmx_apic_access_mfn;
   5.213 +    unsigned long mfn = d->arch.hvm_domain.vmx.apic_access_mfn;
   5.214      if ( mfn != 0 )
   5.215          free_xenheap_page(mfn_to_virt(mfn));
   5.216  }
   5.217 @@ -1655,7 +1763,7 @@ static void vmx_install_vlapic_mapping(s
   5.218          return;
   5.219  
   5.220      virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
   5.221 -    apic_page_ma = v->domain->arch.hvm_domain.vmx_apic_access_mfn;
   5.222 +    apic_page_ma = v->domain->arch.hvm_domain.vmx.apic_access_mfn;
   5.223      apic_page_ma <<= PAGE_SHIFT;
   5.224  
   5.225      vmx_vmcs_enter(v);
   5.226 @@ -1900,6 +2008,17 @@ static void vmx_wbinvd_intercept(void)
   5.227          wbinvd();
   5.228  }
   5.229  
   5.230 +static void ept_handle_violation(unsigned long qualification, paddr_t gpa)
   5.231 +{
   5.232 +    if ( unlikely(((qualification >> 7) & 0x3) != 0x3) )
   5.233 +    {
   5.234 +        domain_crash(current->domain);
   5.235 +        return;
   5.236 +    }
   5.237 +
   5.238 +    handle_mmio();
   5.239 +}
   5.240 +
   5.241  static void vmx_failed_vmentry(unsigned int exit_reason,
   5.242                                 struct cpu_user_regs *regs)
   5.243  {
   5.244 @@ -1939,6 +2058,10 @@ asmlinkage void vmx_vmexit_handler(struc
   5.245      unsigned long exit_qualification, inst_len = 0;
   5.246      struct vcpu *v = current;
   5.247  
   5.248 +    if ( paging_mode_hap(v->domain) && hvm_paging_enabled(v) )
   5.249 +        v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
   5.250 +            __vmread(GUEST_CR3);
   5.251 +
   5.252      exit_reason = __vmread(VM_EXIT_REASON);
   5.253  
   5.254      hvmtrace_vmexit(v, regs->eip, exit_reason);
   5.255 @@ -2171,6 +2294,17 @@ asmlinkage void vmx_vmexit_handler(struc
   5.256          break;
   5.257      }
   5.258  
   5.259 +    case EXIT_REASON_EPT_VIOLATION:
   5.260 +    {
   5.261 +        paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS);
   5.262 +#ifdef CONFIG_X86_PAE
   5.263 +        gpa |= (paddr_t)__vmread(GUEST_PHYSICAL_ADDRESS_HIGH) << 32;
   5.264 +#endif
   5.265 +        exit_qualification = __vmread(EXIT_QUALIFICATION);
   5.266 +        ept_handle_violation(exit_qualification, gpa);
   5.267 +        break;
   5.268 +    }
   5.269 +
   5.270      default:
   5.271      exit_and_crash:
   5.272          gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
     6.1 --- a/xen/arch/x86/mm.c	Tue Apr 08 11:41:27 2008 +0100
     6.2 +++ b/xen/arch/x86/mm.c	Wed Apr 09 11:30:32 2008 +0100
     6.3 @@ -299,7 +299,7 @@ int memory_is_conventional_ram(paddr_t p
     6.4  unsigned long domain_get_maximum_gpfn(struct domain *d)
     6.5  {
     6.6      if ( is_hvm_domain(d) )
     6.7 -        return d->arch.p2m.max_mapped_pfn;
     6.8 +        return d->arch.p2m->max_mapped_pfn;
     6.9      /* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */
    6.10      return arch_get_max_pfn(d) - 1;
    6.11  }
     7.1 --- a/xen/arch/x86/mm/hap/Makefile	Tue Apr 08 11:41:27 2008 +0100
     7.2 +++ b/xen/arch/x86/mm/hap/Makefile	Wed Apr 09 11:30:32 2008 +0100
     7.3 @@ -2,6 +2,7 @@ obj-y += hap.o
     7.4  obj-y += guest_walk_2level.o
     7.5  obj-y += guest_walk_3level.o
     7.6  obj-y += guest_walk_4level.o
     7.7 +obj-y += p2m-ept.o
     7.8  
     7.9  guest_levels  = $(subst level,,$(filter %level,$(subst ., ,$(subst _, ,$(1)))))
    7.10  guest_walk_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1))
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/xen/arch/x86/mm/hap/p2m-ept.c	Wed Apr 09 11:30:32 2008 +0100
     8.3 @@ -0,0 +1,187 @@
     8.4 +/*
     8.5 + * ept-p2m.c: use the EPT page table as p2m
     8.6 + * Copyright (c) 2007, Intel Corporation.
     8.7 + *
     8.8 + * This program is free software; you can redistribute it and/or modify it
     8.9 + * under the terms and conditions of the GNU General Public License,
    8.10 + * version 2, as published by the Free Software Foundation.
    8.11 + *
    8.12 + * This program is distributed in the hope it will be useful, but WITHOUT
    8.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    8.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    8.15 + * more details.
    8.16 + *
    8.17 + * You should have received a copy of the GNU General Public License along with
    8.18 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    8.19 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    8.20 + */
    8.21 +
    8.22 +#include <xen/config.h>
    8.23 +#include <xen/domain_page.h>
    8.24 +#include <xen/sched.h>
    8.25 +#include <asm/current.h>
    8.26 +#include <asm/types.h>
    8.27 +#include <asm/domain.h>
    8.28 +#include <asm/p2m.h>
    8.29 +#include <asm/hvm/vmx/vmx.h>
    8.30 +#include <xen/iommu.h>
    8.31 +
    8.32 +static int ept_next_level(struct domain *d, bool_t read_only,
    8.33 +                          ept_entry_t **table, unsigned long *gfn_remainder,
    8.34 +                          u32 shift)
    8.35 +{
    8.36 +    ept_entry_t *ept_entry, *next;
    8.37 +    u32 index;
    8.38 +
    8.39 +    index = *gfn_remainder >> shift;
    8.40 +    *gfn_remainder &= (1UL << shift) - 1;
    8.41 +
    8.42 +    ept_entry = (*table) + index;
    8.43 +
    8.44 +    if ( !(ept_entry->epte & 0x7) )
    8.45 +    {
    8.46 +        struct page_info *pg;
    8.47 +
    8.48 +        if ( read_only )
    8.49 +            return 0;
    8.50 +
    8.51 +        pg = d->arch.p2m->alloc_page(d);
    8.52 +        if ( pg == NULL )
    8.53 +            return 0;
    8.54 +
    8.55 +        pg->count_info = 1;
    8.56 +        pg->u.inuse.type_info = 1 | PGT_validated;
    8.57 +        list_add_tail(&pg->list, &d->arch.p2m->pages);
    8.58 +
    8.59 +        ept_entry->emt = 0;
    8.60 +        ept_entry->sp_avail = 0;
    8.61 +        ept_entry->avail1 = 0;
    8.62 +        ept_entry->mfn = page_to_mfn(pg);
    8.63 +        ept_entry->rsvd = 0;
    8.64 +        ept_entry->avail2 = 0;
    8.65 +        /* last step */
    8.66 +        ept_entry->r = ept_entry->w = ept_entry->x = 1;
    8.67 +    }
    8.68 +
    8.69 +    next = map_domain_page(ept_entry->mfn);
    8.70 +    unmap_domain_page(*table);
    8.71 +    *table = next;
    8.72 +
    8.73 +    return 1;
    8.74 +}
    8.75 +
    8.76 +static int
    8.77 +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
    8.78 +{
    8.79 +    ept_entry_t *table =
    8.80 +        map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
    8.81 +    unsigned long gfn_remainder = gfn;
    8.82 +    ept_entry_t *ept_entry = NULL;
    8.83 +    u32 index;
    8.84 +    int i, rv = 0;
    8.85 +
    8.86 +    /* Should check if gfn obeys GAW here */
    8.87 +
    8.88 +    for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
    8.89 +        if ( !ept_next_level(d, 0, &table, &gfn_remainder,
    8.90 +                             i * EPT_TABLE_ORDER) )
    8.91 +            goto out;
    8.92 +
    8.93 +    index = gfn_remainder;
    8.94 +    ept_entry = table + index;
    8.95 +
    8.96 +    if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
    8.97 +    {
    8.98 +        /* Track the highest gfn for which we have ever had a valid mapping */
    8.99 +        if ( gfn > d->arch.p2m->max_mapped_pfn )
   8.100 +            d->arch.p2m->max_mapped_pfn = gfn;
   8.101 +
   8.102 +        ept_entry->emt = EPT_DEFAULT_MT;
   8.103 +        ept_entry->sp_avail = 0;
   8.104 +        ept_entry->avail1 = p2mt;
   8.105 +        ept_entry->mfn = mfn_x(mfn);
   8.106 +        ept_entry->rsvd = 0;
   8.107 +        ept_entry->avail2 = 0;
   8.108 +        /* last step */
   8.109 +        ept_entry->r = ept_entry->w = ept_entry->x = 1;
   8.110 +    }
   8.111 +    else
   8.112 +        ept_entry->epte = 0;
   8.113 +
   8.114 +    /* Success */
   8.115 +    rv = 1;
   8.116 +
   8.117 + out:
   8.118 +    unmap_domain_page(table);
   8.119 +
   8.120 +    ept_sync_domain(d);
   8.121 +
   8.122 +    /* If p2m table is shared with vtd page-table. */
   8.123 +    if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
   8.124 +        iommu_flush(d, gfn, (u64*)ept_entry);
   8.125 +
   8.126 +    return rv;
   8.127 +}
   8.128 +
   8.129 +/* Read ept p2m entries */
   8.130 +static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t)
   8.131 +{
   8.132 +    ept_entry_t *table =
   8.133 +        map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   8.134 +    unsigned long gfn_remainder = gfn;
   8.135 +    ept_entry_t *ept_entry;
   8.136 +    u32 index;
   8.137 +    int i;
   8.138 +    mfn_t mfn = _mfn(INVALID_MFN);
   8.139 +
   8.140 +    *t = p2m_mmio_dm;
   8.141 +
   8.142 +    /* This pfn is higher than the highest the p2m map currently holds */
   8.143 +    if ( gfn > d->arch.p2m->max_mapped_pfn )
   8.144 +        goto out;
   8.145 +
   8.146 +    /* Should check if gfn obeys GAW here. */
   8.147 +
   8.148 +    for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
   8.149 +        if ( !ept_next_level(d, 1, &table, &gfn_remainder,
   8.150 +                             i * EPT_TABLE_ORDER) )
   8.151 +            goto out;
   8.152 +
   8.153 +    index = gfn_remainder;
   8.154 +    ept_entry = table + index;
   8.155 +
   8.156 +    if ( (ept_entry->epte & 0x7) == 0x7 )
   8.157 +    {
   8.158 +        if ( ept_entry->avail1 != p2m_invalid )
   8.159 +        {
   8.160 +            *t = ept_entry->avail1;
   8.161 +            mfn = _mfn(ept_entry->mfn);
   8.162 +        }
   8.163 +    }
   8.164 +
   8.165 + out:
   8.166 +    unmap_domain_page(table);
   8.167 +    return mfn;
   8.168 +}
   8.169 +
   8.170 +static mfn_t ept_get_entry_current(unsigned long gfn, p2m_type_t *t)
   8.171 +{
   8.172 +    return ept_get_entry(current->domain, gfn, t);
   8.173 +}
   8.174 +
   8.175 +void ept_p2m_init(struct domain *d)
   8.176 +{
   8.177 +    d->arch.p2m->set_entry = ept_set_entry;
   8.178 +    d->arch.p2m->get_entry = ept_get_entry;
   8.179 +    d->arch.p2m->get_entry_current = ept_get_entry_current;
   8.180 +}
   8.181 +
   8.182 +/*
   8.183 + * Local variables:
   8.184 + * mode: C
   8.185 + * c-set-style: "BSD"
   8.186 + * c-basic-offset: 4
   8.187 + * tab-width: 4
   8.188 + * indent-tabs-mode: nil
   8.189 + * End:
   8.190 + */
     9.1 --- a/xen/arch/x86/mm/p2m.c	Tue Apr 08 11:41:27 2008 +0100
     9.2 +++ b/xen/arch/x86/mm/p2m.c	Wed Apr 09 11:30:32 2008 +0100
     9.3 @@ -27,6 +27,7 @@
     9.4  #include <asm/page.h>
     9.5  #include <asm/paging.h>
     9.6  #include <asm/p2m.h>
     9.7 +#include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
     9.8  #include <xen/iommu.h>
     9.9  
    9.10  /* Debugging and auditing of the P2M code? */
    9.11 @@ -41,37 +42,36 @@
    9.12   * Locking discipline: always acquire this lock before the shadow or HAP one
    9.13   */
    9.14  
    9.15 -#define p2m_lock_init(_d)                            \
    9.16 -    do {                                             \
    9.17 -        spin_lock_init(&(_d)->arch.p2m.lock);        \
    9.18 -        (_d)->arch.p2m.locker = -1;                  \
    9.19 -        (_d)->arch.p2m.locker_function = "nobody";   \
    9.20 +#define p2m_lock_init(_p2m)                     \
    9.21 +    do {                                        \
    9.22 +        spin_lock_init(&(_p2m)->lock);          \
    9.23 +        (_p2m)->locker = -1;                    \
    9.24 +        (_p2m)->locker_function = "nobody";     \
    9.25      } while (0)
    9.26  
    9.27 -#define p2m_lock(_d)                                                \
    9.28 -    do {                                                            \
    9.29 -        if ( unlikely((_d)->arch.p2m.locker == current->processor) )\
    9.30 -        {                                                           \
    9.31 -            printk("Error: p2m lock held by %s\n",                  \
    9.32 -                   (_d)->arch.p2m.locker_function);                 \
    9.33 -            BUG();                                                  \
    9.34 -        }                                                           \
    9.35 -        spin_lock(&(_d)->arch.p2m.lock);                            \
    9.36 -        ASSERT((_d)->arch.p2m.locker == -1);                        \
    9.37 -        (_d)->arch.p2m.locker = current->processor;                 \
    9.38 -        (_d)->arch.p2m.locker_function = __func__;                  \
    9.39 +#define p2m_lock(_p2m)                                          \
    9.40 +    do {                                                        \
    9.41 +        if ( unlikely((_p2m)->locker == current->processor) )   \
    9.42 +        {                                                       \
    9.43 +            printk("Error: p2m lock held by %s\n",              \
    9.44 +                   (_p2m)->locker_function);                    \
    9.45 +            BUG();                                              \
    9.46 +        }                                                       \
    9.47 +        spin_lock(&(_p2m)->lock);                               \
    9.48 +        ASSERT((_p2m)->locker == -1);                           \
    9.49 +        (_p2m)->locker = current->processor;                    \
    9.50 +        (_p2m)->locker_function = __func__;                     \
    9.51      } while (0)
    9.52  
    9.53 -#define p2m_unlock(_d)                                              \
    9.54 -    do {                                                            \
    9.55 -        ASSERT((_d)->arch.p2m.locker == current->processor); \
    9.56 -        (_d)->arch.p2m.locker = -1;                          \
    9.57 -        (_d)->arch.p2m.locker_function = "nobody";           \
    9.58 -        spin_unlock(&(_d)->arch.p2m.lock);                   \
    9.59 +#define p2m_unlock(_p2m)                                \
    9.60 +    do {                                                \
    9.61 +        ASSERT((_p2m)->locker == current->processor);   \
    9.62 +        (_p2m)->locker = -1;                            \
    9.63 +        (_p2m)->locker_function = "nobody";             \
    9.64 +        spin_unlock(&(_p2m)->lock);                     \
    9.65      } while (0)
    9.66  
    9.67  
    9.68 -
    9.69  /* Printouts */
    9.70  #define P2M_PRINTK(_f, _a...)                                \
    9.71      debugtrace_printk("p2m: %s(): " _f, __func__, ##_a)
    9.72 @@ -152,7 +152,7 @@ p2m_next_level(struct domain *d, mfn_t *
    9.73      l1_pgentry_t *p2m_entry;
    9.74      l1_pgentry_t new_entry;
    9.75      void *next;
    9.76 -    ASSERT(d->arch.p2m.alloc_page);
    9.77 +    ASSERT(d->arch.p2m->alloc_page);
    9.78  
    9.79      if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
    9.80                                        shift, max)) )
    9.81 @@ -160,10 +160,10 @@ p2m_next_level(struct domain *d, mfn_t *
    9.82  
    9.83      if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
    9.84      {
    9.85 -        struct page_info *pg = d->arch.p2m.alloc_page(d);
    9.86 +        struct page_info *pg = d->arch.p2m->alloc_page(d);
    9.87          if ( pg == NULL )
    9.88              return 0;
    9.89 -        list_add_tail(&pg->list, &d->arch.p2m.pages);
    9.90 +        list_add_tail(&pg->list, &d->arch.p2m->pages);
    9.91          pg->u.inuse.type_info = type | 1 | PGT_validated;
    9.92          pg->count_info = 1;
    9.93  
    9.94 @@ -202,7 +202,7 @@ p2m_next_level(struct domain *d, mfn_t *
    9.95  
    9.96  // Returns 0 on error (out of memory)
    9.97  static int
    9.98 -set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
    9.99 +p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
   9.100  {
   9.101      // XXX -- this might be able to be faster iff current->domain == d
   9.102      mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
   9.103 @@ -244,8 +244,8 @@ set_p2m_entry(struct domain *d, unsigned
   9.104      ASSERT(p2m_entry);
   9.105  
   9.106      /* Track the highest gfn for which we have ever had a valid mapping */
   9.107 -    if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) )
   9.108 -        d->arch.p2m.max_mapped_pfn = gfn;
   9.109 +    if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
   9.110 +        d->arch.p2m->max_mapped_pfn = gfn;
   9.111  
   9.112      if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
   9.113          entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
   9.114 @@ -279,135 +279,8 @@ set_p2m_entry(struct domain *d, unsigned
   9.115      return rv;
   9.116  }
   9.117  
   9.118 -
   9.119 -/* Init the datastructures for later use by the p2m code */
   9.120 -void p2m_init(struct domain *d)
   9.121 -{
   9.122 -    p2m_lock_init(d);
   9.123 -    INIT_LIST_HEAD(&d->arch.p2m.pages);
   9.124 -}
   9.125 -
   9.126 -
   9.127 -// Allocate a new p2m table for a domain.
   9.128 -//
   9.129 -// The structure of the p2m table is that of a pagetable for xen (i.e. it is
   9.130 -// controlled by CONFIG_PAGING_LEVELS).
   9.131 -//
   9.132 -// The alloc_page and free_page functions will be used to get memory to
   9.133 -// build the p2m, and to release it again at the end of day.
   9.134 -//
   9.135 -// Returns 0 for success or -errno.
   9.136 -//
   9.137 -int p2m_alloc_table(struct domain *d,
   9.138 -                    struct page_info * (*alloc_page)(struct domain *d),
   9.139 -                    void (*free_page)(struct domain *d, struct page_info *pg))
   9.140 -
   9.141 -{
   9.142 -    mfn_t mfn = _mfn(INVALID_MFN);
   9.143 -    struct list_head *entry;
   9.144 -    struct page_info *page, *p2m_top;
   9.145 -    unsigned int page_count = 0;
   9.146 -    unsigned long gfn = -1UL;
   9.147 -
   9.148 -    p2m_lock(d);
   9.149 -
   9.150 -    if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
   9.151 -    {
   9.152 -        P2M_ERROR("p2m already allocated for this domain\n");
   9.153 -        p2m_unlock(d);
   9.154 -        return -EINVAL;
   9.155 -    }
   9.156 -
   9.157 -    P2M_PRINTK("allocating p2m table\n");
   9.158 -
   9.159 -    d->arch.p2m.alloc_page = alloc_page;
   9.160 -    d->arch.p2m.free_page = free_page;
   9.161 -
   9.162 -    p2m_top = d->arch.p2m.alloc_page(d);
   9.163 -    if ( p2m_top == NULL )
   9.164 -    {
   9.165 -        p2m_unlock(d);
   9.166 -        return -ENOMEM;
   9.167 -    }
   9.168 -    list_add_tail(&p2m_top->list, &d->arch.p2m.pages);
   9.169 -
   9.170 -    p2m_top->count_info = 1;
   9.171 -    p2m_top->u.inuse.type_info =
   9.172 -#if CONFIG_PAGING_LEVELS == 4
   9.173 -        PGT_l4_page_table
   9.174 -#elif CONFIG_PAGING_LEVELS == 3
   9.175 -        PGT_l3_page_table
   9.176 -#elif CONFIG_PAGING_LEVELS == 2
   9.177 -        PGT_l2_page_table
   9.178 -#endif
   9.179 -        | 1 | PGT_validated;
   9.180 -
   9.181 -    d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
   9.182 -
   9.183 -    P2M_PRINTK("populating p2m table\n");
   9.184 -
   9.185 -    /* Initialise physmap tables for slot zero. Other code assumes this. */
   9.186 -    if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
   9.187 -        goto error;
   9.188 -
   9.189 -    /* Copy all existing mappings from the page list and m2p */
   9.190 -    for ( entry = d->page_list.next;
   9.191 -          entry != &d->page_list;
   9.192 -          entry = entry->next )
   9.193 -    {
   9.194 -        page = list_entry(entry, struct page_info, list);
   9.195 -        mfn = page_to_mfn(page);
   9.196 -        gfn = get_gpfn_from_mfn(mfn_x(mfn));
   9.197 -        page_count++;
   9.198 -        if (
   9.199 -#ifdef __x86_64__
   9.200 -            (gfn != 0x5555555555555555L)
   9.201 -#else
   9.202 -            (gfn != 0x55555555L)
   9.203 -#endif
   9.204 -             && gfn != INVALID_M2P_ENTRY
   9.205 -            && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
   9.206 -            goto error;
   9.207 -    }
   9.208 -
   9.209 -#if CONFIG_PAGING_LEVELS >= 3
   9.210 -    if (vtd_enabled && is_hvm_domain(d))
   9.211 -        iommu_set_pgd(d);
   9.212 -#endif
   9.213 -
   9.214 -    P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
   9.215 -    p2m_unlock(d);
   9.216 -    return 0;
   9.217 -
   9.218 - error:
   9.219 -    P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
   9.220 -               PRI_mfn "\n", gfn, mfn_x(mfn));
   9.221 -    p2m_unlock(d);
   9.222 -    return -ENOMEM;
   9.223 -}
   9.224 -
   9.225 -void p2m_teardown(struct domain *d)
   9.226 -/* Return all the p2m pages to Xen.
   9.227 - * We know we don't have any extra mappings to these pages */
   9.228 -{
   9.229 -    struct list_head *entry, *n;
   9.230 -    struct page_info *pg;
   9.231 -
   9.232 -    p2m_lock(d);
   9.233 -    d->arch.phys_table = pagetable_null();
   9.234 -
   9.235 -    list_for_each_safe(entry, n, &d->arch.p2m.pages)
   9.236 -    {
   9.237 -        pg = list_entry(entry, struct page_info, list);
   9.238 -        list_del(entry);
   9.239 -        d->arch.p2m.free_page(d, pg);
   9.240 -    }
   9.241 -    p2m_unlock(d);
   9.242 -}
   9.243 -
   9.244 -mfn_t
   9.245 -gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
   9.246 -/* Read another domain's p2m entries */
   9.247 +static mfn_t
   9.248 +p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t)
   9.249  {
   9.250      mfn_t mfn;
   9.251      paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
   9.252 @@ -424,7 +297,7 @@ gfn_to_mfn_foreign(struct domain *d, uns
   9.253  
   9.254      mfn = pagetable_get_mfn(d->arch.phys_table);
   9.255  
   9.256 -    if ( gfn > d->arch.p2m.max_mapped_pfn )
   9.257 +    if ( gfn > d->arch.p2m->max_mapped_pfn )
   9.258          /* This pfn is higher than the highest the p2m map currently holds */
   9.259          return _mfn(INVALID_MFN);
   9.260  
   9.261 @@ -489,6 +362,201 @@ gfn_to_mfn_foreign(struct domain *d, uns
   9.262      return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
   9.263  }
   9.264  
   9.265 +/* Read the current domain's p2m table (through the linear mapping). */
   9.266 +static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
   9.267 +{
   9.268 +    mfn_t mfn = _mfn(INVALID_MFN);
   9.269 +    p2m_type_t p2mt = p2m_mmio_dm;
   9.270 +    /* XXX This is for compatibility with the old model, where anything not 
   9.271 +     * XXX marked as RAM was considered to be emulated MMIO space.
   9.272 +     * XXX Once we start explicitly registering MMIO regions in the p2m 
   9.273 +     * XXX we will return p2m_invalid for unmapped gfns */
   9.274 +
   9.275 +    if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
   9.276 +    {
   9.277 +        l1_pgentry_t l1e = l1e_empty();
   9.278 +        int ret;
   9.279 +
   9.280 +        ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
   9.281 +               / sizeof(l1_pgentry_t));
   9.282 +
   9.283 +        /* Need to __copy_from_user because the p2m is sparse and this
   9.284 +         * part might not exist */
   9.285 +        ret = __copy_from_user(&l1e,
   9.286 +                               &phys_to_machine_mapping[gfn],
   9.287 +                               sizeof(l1e));
   9.288 +
   9.289 +        if ( ret == 0 ) {
   9.290 +            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
   9.291 +            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
   9.292 +            if ( p2m_is_valid(p2mt) )
   9.293 +                mfn = _mfn(l1e_get_pfn(l1e));
   9.294 +            else 
   9.295 +                /* XXX see above */
   9.296 +                p2mt = p2m_mmio_dm;
   9.297 +        }
   9.298 +    }
   9.299 +
   9.300 +    *t = p2mt;
   9.301 +    return mfn;
   9.302 +}
   9.303 +
   9.304 +/* Init the datastructures for later use by the p2m code */
   9.305 +int p2m_init(struct domain *d)
   9.306 +{
   9.307 +    struct p2m_domain *p2m;
   9.308 +
   9.309 +    p2m = xmalloc(struct p2m_domain);
   9.310 +    if ( p2m == NULL )
   9.311 +        return -ENOMEM;
   9.312 +
   9.313 +    d->arch.p2m = p2m;
   9.314 +
   9.315 +    p2m_lock_init(p2m);
   9.316 +    INIT_LIST_HEAD(&p2m->pages);
   9.317 +
   9.318 +    p2m->set_entry = p2m_set_entry;
   9.319 +    p2m->get_entry = p2m_gfn_to_mfn;
   9.320 +    p2m->get_entry_current = p2m_gfn_to_mfn_current;
   9.321 +
   9.322 +    if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
   9.323 +         (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
   9.324 +        ept_p2m_init(d);
   9.325 +
   9.326 +    return 0;
   9.327 +}
   9.328 +
   9.329 +static inline
   9.330 +int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
   9.331 +{
   9.332 +    return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
   9.333 +}
   9.334 +
   9.335 +// Allocate a new p2m table for a domain.
   9.336 +//
   9.337 +// The structure of the p2m table is that of a pagetable for xen (i.e. it is
   9.338 +// controlled by CONFIG_PAGING_LEVELS).
   9.339 +//
   9.340 +// The alloc_page and free_page functions will be used to get memory to
   9.341 +// build the p2m, and to release it again at the end of day.
   9.342 +//
   9.343 +// Returns 0 for success or -errno.
   9.344 +//
   9.345 +int p2m_alloc_table(struct domain *d,
   9.346 +                    struct page_info * (*alloc_page)(struct domain *d),
   9.347 +                    void (*free_page)(struct domain *d, struct page_info *pg))
   9.348 +
   9.349 +{
   9.350 +    mfn_t mfn = _mfn(INVALID_MFN);
   9.351 +    struct list_head *entry;
   9.352 +    struct page_info *page, *p2m_top;
   9.353 +    unsigned int page_count = 0;
   9.354 +    unsigned long gfn = -1UL;
   9.355 +    struct p2m_domain *p2m = d->arch.p2m;
   9.356 +
   9.357 +    p2m_lock(p2m);
   9.358 +
   9.359 +    if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
   9.360 +    {
   9.361 +        P2M_ERROR("p2m already allocated for this domain\n");
   9.362 +        p2m_unlock(p2m);
   9.363 +        return -EINVAL;
   9.364 +    }
   9.365 +
   9.366 +    P2M_PRINTK("allocating p2m table\n");
   9.367 +
   9.368 +    p2m->alloc_page = alloc_page;
   9.369 +    p2m->free_page = free_page;
   9.370 +
   9.371 +    p2m_top = p2m->alloc_page(d);
   9.372 +    if ( p2m_top == NULL )
   9.373 +    {
   9.374 +        p2m_unlock(p2m);
   9.375 +        return -ENOMEM;
   9.376 +    }
   9.377 +    list_add_tail(&p2m_top->list, &p2m->pages);
   9.378 +
   9.379 +    p2m_top->count_info = 1;
   9.380 +    p2m_top->u.inuse.type_info =
   9.381 +#if CONFIG_PAGING_LEVELS == 4
   9.382 +        PGT_l4_page_table
   9.383 +#elif CONFIG_PAGING_LEVELS == 3
   9.384 +        PGT_l3_page_table
   9.385 +#elif CONFIG_PAGING_LEVELS == 2
   9.386 +        PGT_l2_page_table
   9.387 +#endif
   9.388 +        | 1 | PGT_validated;
   9.389 +
   9.390 +    d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top));
   9.391 +
   9.392 +    P2M_PRINTK("populating p2m table\n");
   9.393 +
   9.394 +    /* Initialise physmap tables for slot zero. Other code assumes this. */
   9.395 +    if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
   9.396 +        goto error;
   9.397 +
   9.398 +    /* Copy all existing mappings from the page list and m2p */
   9.399 +    for ( entry = d->page_list.next;
   9.400 +          entry != &d->page_list;
   9.401 +          entry = entry->next )
   9.402 +    {
   9.403 +        page = list_entry(entry, struct page_info, list);
   9.404 +        mfn = page_to_mfn(page);
   9.405 +        gfn = get_gpfn_from_mfn(mfn_x(mfn));
   9.406 +        page_count++;
   9.407 +        if (
   9.408 +#ifdef __x86_64__
   9.409 +            (gfn != 0x5555555555555555L)
   9.410 +#else
   9.411 +            (gfn != 0x55555555L)
   9.412 +#endif
   9.413 +             && gfn != INVALID_M2P_ENTRY
   9.414 +            && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
   9.415 +            goto error;
   9.416 +    }
   9.417 +
   9.418 +#if CONFIG_PAGING_LEVELS >= 3
   9.419 +    if (vtd_enabled && is_hvm_domain(d))
   9.420 +        iommu_set_pgd(d);
   9.421 +#endif
   9.422 +
   9.423 +    P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
   9.424 +    p2m_unlock(p2m);
   9.425 +    return 0;
   9.426 +
   9.427 + error:
   9.428 +    P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
   9.429 +               PRI_mfn "\n", gfn, mfn_x(mfn));
   9.430 +    p2m_unlock(p2m);
   9.431 +    return -ENOMEM;
   9.432 +}
   9.433 +
   9.434 +void p2m_teardown(struct domain *d)
   9.435 +/* Return all the p2m pages to Xen.
   9.436 + * We know we don't have any extra mappings to these pages */
   9.437 +{
   9.438 +    struct list_head *entry, *n;
   9.439 +    struct page_info *pg;
   9.440 +    struct p2m_domain *p2m = d->arch.p2m;
   9.441 +
   9.442 +    p2m_lock(p2m);
   9.443 +    d->arch.phys_table = pagetable_null();
   9.444 +
   9.445 +    list_for_each_safe(entry, n, &p2m->pages)
   9.446 +    {
   9.447 +        pg = list_entry(entry, struct page_info, list);
   9.448 +        list_del(entry);
   9.449 +        p2m->free_page(d, pg);
   9.450 +    }
   9.451 +    p2m_unlock(p2m);
   9.452 +}
   9.453 +
   9.454 +void p2m_final_teardown(struct domain *d)
   9.455 +{
   9.456 +    xfree(d->arch.p2m);
   9.457 +    d->arch.p2m = NULL;
   9.458 +}
   9.459 +
   9.460  #if P2M_AUDIT
   9.461  static void audit_p2m(struct domain *d)
   9.462  {
   9.463 @@ -564,7 +632,7 @@ static void audit_p2m(struct domain *d)
   9.464              set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
   9.465          }
   9.466  
   9.467 -        if ( test_linear && (gfn <= d->arch.p2m.max_mapped_pfn) )
   9.468 +        if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
   9.469          {
   9.470              lp2mfn = mfn_x(gfn_to_mfn_current(gfn, &type));
   9.471              if ( lp2mfn != mfn_x(p2mfn) )
   9.472 @@ -695,11 +763,11 @@ void
   9.473  guest_physmap_remove_page(struct domain *d, unsigned long gfn,
   9.474                            unsigned long mfn)
   9.475  {
   9.476 -    p2m_lock(d);
   9.477 +    p2m_lock(d->arch.p2m);
   9.478      audit_p2m(d);
   9.479      p2m_remove_page(d, gfn, mfn);
   9.480      audit_p2m(d);
   9.481 -    p2m_unlock(d);
   9.482 +    p2m_unlock(d->arch.p2m);
   9.483  }
   9.484  
   9.485  int
   9.486 @@ -722,7 +790,7 @@ guest_physmap_add_entry(struct domain *d
   9.487       */
   9.488      if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
   9.489      {
   9.490 -        if ( !test_and_set_bool(d->arch.hvm_domain.amd_npt_4gb_warning) )
   9.491 +        if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
   9.492              dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
   9.493                      " 4GB: specify 'hap=0' domain config option.\n",
   9.494                      d->domain_id);
   9.495 @@ -730,7 +798,7 @@ guest_physmap_add_entry(struct domain *d
   9.496      }
   9.497  #endif
   9.498  
   9.499 -    p2m_lock(d);
   9.500 +    p2m_lock(d->arch.p2m);
   9.501      audit_p2m(d);
   9.502  
   9.503      P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
   9.504 @@ -781,7 +849,7 @@ guest_physmap_add_entry(struct domain *d
   9.505      }
   9.506  
   9.507      audit_p2m(d);
   9.508 -    p2m_unlock(d);
   9.509 +    p2m_unlock(d->arch.p2m);
   9.510  
   9.511      return rc;
   9.512  }
   9.513 @@ -812,7 +880,7 @@ void p2m_change_type_global(struct domai
   9.514      if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
   9.515          return;
   9.516  
   9.517 -    p2m_lock(d);
   9.518 +    p2m_lock(d->arch.p2m);
   9.519  
   9.520  #if CONFIG_PAGING_LEVELS == 4
   9.521      l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   9.522 @@ -884,7 +952,7 @@ void p2m_change_type_global(struct domai
   9.523      unmap_domain_page(l2e);
   9.524  #endif
   9.525  
   9.526 -    p2m_unlock(d);
   9.527 +    p2m_unlock(d->arch.p2m);
   9.528  }
   9.529  
   9.530  /* Modify the p2m type of a single gfn from ot to nt, returning the 
   9.531 @@ -895,13 +963,13 @@ p2m_type_t p2m_change_type(struct domain
   9.532      p2m_type_t pt;
   9.533      mfn_t mfn;
   9.534  
   9.535 -    p2m_lock(d);
   9.536 +    p2m_lock(d->arch.p2m);
   9.537  
   9.538      mfn = gfn_to_mfn(d, gfn, &pt);
   9.539      if ( pt == ot )
   9.540          set_p2m_entry(d, gfn, mfn, nt);
   9.541  
   9.542 -    p2m_unlock(d);
   9.543 +    p2m_unlock(d->arch.p2m);
   9.544  
   9.545      return pt;
   9.546  }
    10.1 --- a/xen/arch/x86/mm/paging.c	Tue Apr 08 11:41:27 2008 +0100
    10.2 +++ b/xen/arch/x86/mm/paging.c	Wed Apr 09 11:30:32 2008 +0100
    10.3 @@ -484,9 +484,12 @@ void paging_log_dirty_teardown(struct do
    10.4  /*           CODE FOR PAGING SUPPORT            */
    10.5  /************************************************/
    10.6  /* Domain paging struct initialization. */
    10.7 -void paging_domain_init(struct domain *d)
    10.8 +int paging_domain_init(struct domain *d)
    10.9  {
   10.10 -    p2m_init(d);
   10.11 +    int rc;
   10.12 +
   10.13 +    if ( (rc = p2m_init(d)) != 0 )
   10.14 +        return rc;
   10.15  
   10.16      /* The order of the *_init calls below is important, as the later
   10.17       * ones may rewrite some common fields.  Shadow pagetables are the
   10.18 @@ -496,6 +499,8 @@ void paging_domain_init(struct domain *d
   10.19      /* ... but we will use hardware assistance if it's available. */
   10.20      if ( hap_enabled(d) )
   10.21          hap_domain_init(d);
   10.22 +
   10.23 +    return 0;
   10.24  }
   10.25  
   10.26  /* vcpu paging struct initialization goes here */
   10.27 @@ -589,6 +594,8 @@ void paging_final_teardown(struct domain
   10.28          hap_final_teardown(d);
   10.29      else
   10.30          shadow_final_teardown(d);
   10.31 +
   10.32 +    p2m_final_teardown(d);
   10.33  }
   10.34  
   10.35  /* Enable an arbitrary paging-assistance mode.  Call once at domain
    11.1 --- a/xen/common/domctl.c	Tue Apr 08 11:41:27 2008 +0100
    11.2 +++ b/xen/common/domctl.c	Wed Apr 09 11:30:32 2008 +0100
    11.3 @@ -25,6 +25,8 @@
    11.4  #include <public/domctl.h>
    11.5  #include <xsm/xsm.h>
    11.6  
    11.7 +DEFINE_SPINLOCK(domctl_lock);
    11.8 +
    11.9  extern long arch_do_domctl(
   11.10      struct xen_domctl *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
   11.11  
   11.12 @@ -180,7 +182,6 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   11.13  {
   11.14      long ret = 0;
   11.15      struct xen_domctl curop, *op = &curop;
   11.16 -    static DEFINE_SPINLOCK(domctl_lock);
   11.17  
   11.18      if ( !IS_PRIV(current->domain) )
   11.19          return -EPERM;
    12.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Tue Apr 08 11:41:27 2008 +0100
    12.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Wed Apr 09 11:30:32 2008 +0100
    12.3 @@ -23,6 +23,7 @@
    12.4  #include <xen/sched.h>
    12.5  #include <xen/xmalloc.h>
    12.6  #include <xen/domain_page.h>
    12.7 +#include <asm/paging.h>
    12.8  #include <xen/iommu.h>
    12.9  #include <xen/numa.h>
   12.10  #include "iommu.h"
   12.11 @@ -2057,9 +2058,42 @@ void iommu_set_pgd(struct domain *d)
   12.12      }
   12.13      p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
   12.14  
   12.15 +    if ( paging_mode_hap(d) )
   12.16 +    {
   12.17 +        int level = agaw_to_level(hd->agaw);
   12.18 +        struct dma_pte *dpte = NULL;
   12.19 +        mfn_t pgd_mfn;
   12.20 +
   12.21 +        switch ( level )
   12.22 +        {
   12.23 +        case VTD_PAGE_TABLE_LEVEL_3:
   12.24 +            dpte = map_domain_page(p2m_table);
   12.25 +            if ( !dma_pte_present(*dpte) )
   12.26 +            {
   12.27 +                gdprintk(XENLOG_ERR VTDPREFIX,
   12.28 +                         "iommu_set_pgd: second level wasn't there\n");
   12.29 +                unmap_domain_page(dpte);
   12.30 +                return;
   12.31 +            }
   12.32 +            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
   12.33 +            unmap_domain_page(dpte);
   12.34 +            hd->pgd = maddr_to_virt(pagetable_get_paddr(
   12.35 +                pagetable_from_mfn(pgd_mfn)));
   12.36 +            break;
   12.37 +        case VTD_PAGE_TABLE_LEVEL_4:
   12.38 +            pgd_mfn = _mfn(p2m_table);
   12.39 +            hd->pgd = maddr_to_virt(pagetable_get_paddr(
   12.40 +                pagetable_from_mfn(pgd_mfn)));
   12.41 +            break;
   12.42 +        default:
   12.43 +            gdprintk(XENLOG_ERR VTDPREFIX,
   12.44 +                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
   12.45 +            break;
   12.46 +        }
   12.47 +    }
   12.48 +    else
   12.49 +    {
   12.50  #if CONFIG_PAGING_LEVELS == 3
   12.51 -    if ( !hd->pgd )
   12.52 -    {
   12.53          int level = agaw_to_level(hd->agaw);
   12.54          struct dma_pte *pmd = NULL;
   12.55          struct dma_pte *pgd = NULL;
   12.56 @@ -2125,10 +2159,7 @@ void iommu_set_pgd(struct domain *d)
   12.57          }
   12.58          unmap_domain_page(l3e);
   12.59          spin_unlock_irqrestore(&hd->mapping_lock, flags);
   12.60 -    }
   12.61  #elif CONFIG_PAGING_LEVELS == 4
   12.62 -    if ( !hd->pgd )
   12.63 -    {
   12.64          int level = agaw_to_level(hd->agaw);
   12.65          l3_pgentry_t *l3e;
   12.66          mfn_t pgd_mfn;
   12.67 @@ -2160,8 +2191,8 @@ void iommu_set_pgd(struct domain *d)
   12.68                       "iommu_set_pgd:Unsupported p2m table sharing level!\n");
   12.69              break;
   12.70          }
   12.71 +#endif
   12.72      }
   12.73 -#endif
   12.74      gdprintk(XENLOG_INFO VTDPREFIX,
   12.75               "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
   12.76  }
    13.1 --- a/xen/include/asm-x86/domain.h	Tue Apr 08 11:41:27 2008 +0100
    13.2 +++ b/xen/include/asm-x86/domain.h	Wed Apr 09 11:30:32 2008 +0100
    13.3 @@ -138,27 +138,6 @@ struct hap_domain {
    13.4  };
    13.5  
    13.6  /************************************************/
    13.7 -/*       p2m handling                           */
    13.8 -/************************************************/
    13.9 -struct p2m_domain {
   13.10 -    /* Lock that protects updates to the p2m */
   13.11 -    spinlock_t         lock;
   13.12 -    int                locker;   /* processor which holds the lock */
   13.13 -    const char        *locker_function; /* Func that took it */
   13.14 -
   13.15 -    /* Pages used to construct the p2m */
   13.16 -    struct list_head   pages;
   13.17 -
   13.18 -    /* Functions to call to get or free pages for the p2m */
   13.19 -    struct page_info * (*alloc_page  )(struct domain *d);
   13.20 -    void               (*free_page   )(struct domain *d,
   13.21 -                                       struct page_info *pg);
   13.22 -
   13.23 -    /* Highest guest frame that's ever been mapped in the p2m */
   13.24 -    unsigned long max_mapped_pfn;
   13.25 -};
   13.26 -
   13.27 -/************************************************/
   13.28  /*       common paging data structure           */
   13.29  /************************************************/
   13.30  struct log_dirty_domain {
   13.31 @@ -208,6 +187,8 @@ struct paging_vcpu {
   13.32      struct shadow_vcpu shadow;
   13.33  };
   13.34  
   13.35 +struct p2m_domain;
   13.36 +
   13.37  struct arch_domain
   13.38  {
   13.39      l1_pgentry_t *mm_perdomain_pt;
   13.40 @@ -232,7 +213,7 @@ struct arch_domain
   13.41      struct hvm_domain hvm_domain;
   13.42  
   13.43      struct paging_domain paging;
   13.44 -    struct p2m_domain p2m ;
   13.45 +    struct p2m_domain *p2m;
   13.46  
   13.47      /* Shadow translated domain: P2M mapping */
   13.48      pagetable_t phys_table;
    14.1 --- a/xen/include/asm-x86/hvm/domain.h	Tue Apr 08 11:41:27 2008 +0100
    14.2 +++ b/xen/include/asm-x86/hvm/domain.h	Wed Apr 09 11:30:32 2008 +0100
    14.3 @@ -28,6 +28,8 @@
    14.4  #include <asm/hvm/vioapic.h>
    14.5  #include <asm/hvm/io.h>
    14.6  #include <xen/hvm/iommu.h>
    14.7 +#include <asm/hvm/vmx/vmcs.h>
    14.8 +#include <asm/hvm/svm/vmcb.h>
    14.9  #include <public/hvm/params.h>
   14.10  #include <public/hvm/save.h>
   14.11  
   14.12 @@ -60,8 +62,6 @@ struct hvm_domain {
   14.13  
   14.14      uint64_t               params[HVM_NR_PARAMS];
   14.15  
   14.16 -    unsigned long          vmx_apic_access_mfn;
   14.17 -
   14.18      /* Memory ranges with pinned cache attributes. */
   14.19      struct list_head       pinned_cacheattr_ranges;
   14.20  
   14.21 @@ -74,11 +74,13 @@ struct hvm_domain {
   14.22      /* Pass-through */
   14.23      struct hvm_iommu       hvm_iommu;
   14.24  
   14.25 -#if CONFIG_PAGING_LEVELS == 3
   14.26 -    bool_t                 amd_npt_4gb_warning;
   14.27 -#endif
   14.28      bool_t                 hap_enabled;
   14.29      bool_t                 qemu_mapcache_invalidate;
   14.30 +
   14.31 +    union {
   14.32 +        struct vmx_domain vmx;
   14.33 +        struct svm_domain svm;
   14.34 +    };
   14.35  };
   14.36  
   14.37  #endif /* __ASM_X86_HVM_DOMAIN_H__ */
    15.1 --- a/xen/include/asm-x86/hvm/svm/vmcb.h	Tue Apr 08 11:41:27 2008 +0100
    15.2 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Wed Apr 09 11:30:32 2008 +0100
    15.3 @@ -444,6 +444,12 @@ struct vmcb_struct {
    15.4      u64 res16[301];
    15.5  } __attribute__ ((packed));
    15.6  
    15.7 +struct svm_domain {
    15.8 +#if CONFIG_PAGING_LEVELS == 3
    15.9 +    bool_t npt_4gb_warning;
   15.10 +#endif
   15.11 +};
   15.12 +
   15.13  struct arch_svm_struct {
   15.14      struct vmcb_struct *vmcb;
   15.15      u64    vmcb_pa;
    16.1 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Tue Apr 08 11:41:27 2008 +0100
    16.2 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Wed Apr 09 11:30:32 2008 +0100
    16.3 @@ -53,6 +53,23 @@ struct vmx_msr_state {
    16.4      unsigned long msrs[VMX_MSR_COUNT];
    16.5  };
    16.6  
    16.7 +#define EPT_DEFAULT_MT      6
    16.8 +#define EPT_DEFAULT_GAW     3
    16.9 +
   16.10 +struct vmx_domain {
   16.11 +    unsigned long apic_access_mfn;
   16.12 +
   16.13 +    union {
   16.14 +        struct {
   16.15 +            u64 etmt :3,
   16.16 +                gaw  :3,
   16.17 +                rsvd :6,
   16.18 +                asr  :52;
   16.19 +        };
   16.20 +        u64 eptp;
   16.21 +    } ept_control;
   16.22 +};
   16.23 +
   16.24  struct arch_vmx_struct {
   16.25      /* Virtual address of VMCS. */
   16.26      struct vmcs_struct  *vmcs;
   16.27 @@ -71,6 +88,7 @@ struct arch_vmx_struct {
   16.28  
   16.29      /* Cache of cpu execution control. */
   16.30      u32                  exec_control;
   16.31 +    u32                  secondary_exec_control;
   16.32  
   16.33      /* PMU */
   16.34      struct vpmu_struct   vpmu;
   16.35 @@ -108,6 +126,8 @@ void vmx_vmcs_exit(struct vcpu *v);
   16.36  #define CPU_BASED_MWAIT_EXITING               0x00000400
   16.37  #define CPU_BASED_RDPMC_EXITING               0x00000800
   16.38  #define CPU_BASED_RDTSC_EXITING               0x00001000
   16.39 +#define CPU_BASED_CR3_LOAD_EXITING            0x00008000
   16.40 +#define CPU_BASED_CR3_STORE_EXITING           0x00010000
   16.41  #define CPU_BASED_CR8_LOAD_EXITING            0x00080000
   16.42  #define CPU_BASED_CR8_STORE_EXITING           0x00100000
   16.43  #define CPU_BASED_TPR_SHADOW                  0x00200000
   16.44 @@ -136,6 +156,7 @@ extern u32 vmx_vmexit_control;
   16.45  extern u32 vmx_vmentry_control;
   16.46  
   16.47  #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
   16.48 +#define SECONDARY_EXEC_ENABLE_EPT               0x00000002
   16.49  #define SECONDARY_EXEC_WBINVD_EXITING           0x00000040
   16.50  extern u32 vmx_secondary_exec_control;
   16.51  
   16.52 @@ -151,6 +172,10 @@ extern bool_t cpu_has_vmx_ins_outs_instr
   16.53      (vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS)
   16.54  #define cpu_has_vmx_msr_bitmap \
   16.55      (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
   16.56 +#define cpu_has_vmx_secondary_exec_control \
   16.57 +    (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
   16.58 +#define cpu_has_vmx_ept \
   16.59 +    (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)
   16.60  
   16.61  /* GUEST_INTERRUPTIBILITY_INFO flags. */
   16.62  #define VMX_INTR_SHADOW_STI             0x00000001
   16.63 @@ -192,11 +217,23 @@ enum vmcs_field {
   16.64      VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
   16.65      VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
   16.66      APIC_ACCESS_ADDR                = 0x00002014,
   16.67 -    APIC_ACCESS_ADDR_HIGH           = 0x00002015, 
   16.68 +    APIC_ACCESS_ADDR_HIGH           = 0x00002015,
   16.69 +    EPT_POINTER                     = 0x0000201a,
   16.70 +    EPT_POINTER_HIGH                = 0x0000201b,
   16.71 +    GUEST_PHYSICAL_ADDRESS          = 0x00002400,
   16.72 +    GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
   16.73      VMCS_LINK_POINTER               = 0x00002800,
   16.74      VMCS_LINK_POINTER_HIGH          = 0x00002801,
   16.75      GUEST_IA32_DEBUGCTL             = 0x00002802,
   16.76      GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
   16.77 +    GUEST_PDPTR0                    = 0x0000280a,
   16.78 +    GUEST_PDPTR0_HIGH               = 0x0000280b,
   16.79 +    GUEST_PDPTR1                    = 0x0000280c,
   16.80 +    GUEST_PDPTR1_HIGH               = 0x0000280d,
   16.81 +    GUEST_PDPTR2                    = 0x0000280e,
   16.82 +    GUEST_PDPTR2_HIGH               = 0x0000280f,
   16.83 +    GUEST_PDPTR3                    = 0x00002810,
   16.84 +    GUEST_PDPTR3_HIGH               = 0x00002811,
   16.85      PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
   16.86      CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
   16.87      EXCEPTION_BITMAP                = 0x00004004,
    17.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Tue Apr 08 11:41:27 2008 +0100
    17.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Wed Apr 09 11:30:32 2008 +0100
    17.3 @@ -23,9 +23,27 @@
    17.4  #include <asm/types.h>
    17.5  #include <asm/regs.h>
    17.6  #include <asm/processor.h>
    17.7 +#include <asm/i387.h>
    17.8 +#include <asm/hvm/support.h>
    17.9 +#include <asm/hvm/trace.h>
   17.10  #include <asm/hvm/vmx/vmcs.h>
   17.11 -#include <asm/i387.h>
   17.12 -#include <asm/hvm/trace.h>
   17.13 +
   17.14 +typedef union {
   17.15 +    struct {
   17.16 +        u64 r       :   1,
   17.17 +        w           :   1,
   17.18 +        x           :   1,
   17.19 +        emt         :   4,
   17.20 +        sp_avail    :   1,
   17.21 +        avail1      :   4,
   17.22 +        mfn         :   45,
   17.23 +        rsvd        :   5,
   17.24 +        avail2      :   2;
   17.25 +    };
   17.26 +    u64 epte;
   17.27 +} ept_entry_t;
   17.28 +
   17.29 +#define EPT_TABLE_ORDER     9
   17.30  
   17.31  void vmx_asm_vmexit_handler(struct cpu_user_regs);
   17.32  void vmx_asm_do_vmentry(void);
   17.33 @@ -80,6 +98,8 @@ void vmx_realmode(struct cpu_user_regs *
   17.34  #define EXIT_REASON_MACHINE_CHECK       41
   17.35  #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
   17.36  #define EXIT_REASON_APIC_ACCESS         44
   17.37 +#define EXIT_REASON_EPT_VIOLATION       48
   17.38 +#define EXIT_REASON_EPT_MISCONFIG       49
   17.39  #define EXIT_REASON_WBINVD              54
   17.40  
   17.41  /*
   17.42 @@ -143,12 +163,14 @@ void vmx_realmode(struct cpu_user_regs *
   17.43  #define VMREAD_OPCODE   ".byte 0x0f,0x78\n"
   17.44  #define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n"
   17.45  #define VMWRITE_OPCODE  ".byte 0x0f,0x79\n"
   17.46 +#define INVEPT_OPCODE   ".byte 0x66,0x0f,0x38,0x80\n"   /* m128,r64/32 */
   17.47  #define VMXOFF_OPCODE   ".byte 0x0f,0x01,0xc4\n"
   17.48  #define VMXON_OPCODE    ".byte 0xf3,0x0f,0xc7\n"
   17.49  
   17.50 +#define MODRM_EAX_08    ".byte 0x08\n" /* ECX, [EAX] */
   17.51  #define MODRM_EAX_06    ".byte 0x30\n" /* [EAX], with reg/opcode: /6 */
   17.52  #define MODRM_EAX_07    ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
   17.53 -#define MODRM_EAX_ECX   ".byte 0xc1\n" /* [EAX], [ECX] */
   17.54 +#define MODRM_EAX_ECX   ".byte 0xc1\n" /* EAX, ECX */
   17.55  
   17.56  static inline void __vmptrld(u64 addr)
   17.57  {
   17.58 @@ -232,6 +254,31 @@ static inline void __vm_clear_bit(unsign
   17.59      __vmwrite(field, __vmread(field) & ~(1UL << bit));
   17.60  }
   17.61  
   17.62 +static inline void __invept(int ext, u64 eptp, u64 gpa)
   17.63 +{
   17.64 +    struct {
   17.65 +        u64 eptp, gpa;
   17.66 +    } operand = {eptp, gpa};
   17.67 +
   17.68 +    __asm__ __volatile__ ( INVEPT_OPCODE
   17.69 +                           MODRM_EAX_08
   17.70 +                           /* CF==1 or ZF==1 --> rc = -1 */
   17.71 +                           "ja 1f ; ud2 ; 1:\n"
   17.72 +                           :
   17.73 +                           : "a" (&operand), "c" (ext)
   17.74 +                           : "memory");
   17.75 +}
   17.76 +
   17.77 +static inline void ept_sync_all(void)
   17.78 +{
   17.79 +    if ( !current->domain->arch.hvm_domain.hap_enabled )
   17.80 +        return;
   17.81 +
   17.82 +    __invept(2, 0, 0);
   17.83 +}
   17.84 +
   17.85 +void ept_sync_domain(struct domain *d);
   17.86 +
   17.87  static inline void __vmxoff(void)
   17.88  {
   17.89      asm volatile (
   17.90 @@ -265,4 +312,6 @@ void vmx_inject_hw_exception(struct vcpu
   17.91  void vmx_inject_extint(struct vcpu *v, int trap);
   17.92  void vmx_inject_nmi(struct vcpu *v);
   17.93  
   17.94 +void ept_p2m_init(struct domain *d);
   17.95 +
   17.96  #endif /* __ASM_X86_HVM_VMX_VMX_H__ */
    18.1 --- a/xen/include/asm-x86/p2m.h	Tue Apr 08 11:41:27 2008 +0100
    18.2 +++ b/xen/include/asm-x86/p2m.h	Wed Apr 09 11:30:32 2008 +0100
    18.3 @@ -26,6 +26,8 @@
    18.4  #ifndef _XEN_P2M_H
    18.5  #define _XEN_P2M_H
    18.6  
    18.7 +#include <xen/config.h>
    18.8 +#include <xen/paging.h>
    18.9  
   18.10  /*
   18.11   * The phys_to_machine_mapping maps guest physical frame numbers 
   18.12 @@ -86,54 +88,49 @@ typedef enum {
   18.13  #define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
   18.14  #define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
   18.15  
   18.16 +struct p2m_domain {
   18.17 +    /* Lock that protects updates to the p2m */
   18.18 +    spinlock_t         lock;
   18.19 +    int                locker;   /* processor which holds the lock */
   18.20 +    const char        *locker_function; /* Func that took it */
   18.21 +
   18.22 +    /* Pages used to construct the p2m */
   18.23 +    struct list_head   pages;
   18.24 +
   18.25 +    /* Functions to call to get or free pages for the p2m */
   18.26 +    struct page_info * (*alloc_page  )(struct domain *d);
   18.27 +    void               (*free_page   )(struct domain *d,
   18.28 +                                       struct page_info *pg);
   18.29 +    int                (*set_entry   )(struct domain *d, unsigned long gfn,
   18.30 +                                       mfn_t mfn, p2m_type_t p2mt);
   18.31 +    mfn_t              (*get_entry   )(struct domain *d, unsigned long gfn,
   18.32 +                                       p2m_type_t *p2mt);
   18.33 +    mfn_t              (*get_entry_current)(unsigned long gfn,
   18.34 +                                            p2m_type_t *p2mt);
   18.35 +
   18.36 +    /* Highest guest frame that's ever been mapped in the p2m */
   18.37 +    unsigned long max_mapped_pfn;
   18.38 +};
   18.39 +
   18.40  /* Extract the type from the PTE flags that store it */
   18.41  static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
   18.42  {
   18.43      /* Type is stored in the "available" bits, 9, 10 and 11 */
   18.44      return (flags >> 9) & 0x7;
   18.45  }
   18.46 - 
   18.47 -/* Read the current domain's p2m table (through the linear mapping). */
   18.48 +
   18.49 +/* Read the current domain's p2m table. */
   18.50  static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
   18.51  {
   18.52 -    mfn_t mfn = _mfn(INVALID_MFN);
   18.53 -    p2m_type_t p2mt = p2m_mmio_dm;
   18.54 -    /* XXX This is for compatibility with the old model, where anything not 
   18.55 -     * XXX marked as RAM was considered to be emulated MMIO space.
   18.56 -     * XXX Once we start explicitly registering MMIO regions in the p2m 
   18.57 -     * XXX we will return p2m_invalid for unmapped gfns */
   18.58 -
   18.59 -    if ( gfn <= current->domain->arch.p2m.max_mapped_pfn )
   18.60 -    {
   18.61 -        l1_pgentry_t l1e = l1e_empty();
   18.62 -        int ret;
   18.63 -
   18.64 -        ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
   18.65 -               / sizeof(l1_pgentry_t));
   18.66 -
   18.67 -        /* Need to __copy_from_user because the p2m is sparse and this
   18.68 -         * part might not exist */
   18.69 -        ret = __copy_from_user(&l1e,
   18.70 -                               &phys_to_machine_mapping[gfn],
   18.71 -                               sizeof(l1e));
   18.72 -
   18.73 -        if ( ret == 0 ) {
   18.74 -            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
   18.75 -            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
   18.76 -            if ( p2m_is_valid(p2mt) )
   18.77 -                mfn = _mfn(l1e_get_pfn(l1e));
   18.78 -            else 
   18.79 -                /* XXX see above */
   18.80 -                p2mt = p2m_mmio_dm;
   18.81 -        }
   18.82 -    }
   18.83 -
   18.84 -    *t = p2mt;
   18.85 -    return mfn;
   18.86 +    return current->domain->arch.p2m->get_entry_current(gfn, t);
   18.87  }
   18.88  
   18.89  /* Read another domain's P2M table, mapping pages as we go */
   18.90 -mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t);
   18.91 +static inline
   18.92 +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
   18.93 +{
   18.94 +    return d->arch.p2m->get_entry(d, gfn, t);
   18.95 +}
   18.96  
   18.97  /* General conversion function from gfn to mfn */
   18.98  #define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
   18.99 @@ -149,7 +146,7 @@ static inline mfn_t _gfn_to_mfn(struct d
  18.100      }
  18.101      if ( likely(current->domain == d) )
  18.102          return gfn_to_mfn_current(gfn, t);
  18.103 -    else 
  18.104 +    else
  18.105          return gfn_to_mfn_foreign(d, gfn, t);
  18.106  }
  18.107  
  18.108 @@ -185,7 +182,7 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
  18.109  
  18.110  
  18.111  /* Init the datastructures for later use by the p2m code */
  18.112 -void p2m_init(struct domain *d);
  18.113 +int p2m_init(struct domain *d);
  18.114  
  18.115  /* Allocate a new p2m table for a domain. 
  18.116   *
  18.117 @@ -199,6 +196,7 @@ int p2m_alloc_table(struct domain *d,
  18.118  
  18.119  /* Return all the p2m resources to Xen. */
  18.120  void p2m_teardown(struct domain *d);
  18.121 +void p2m_final_teardown(struct domain *d);
  18.122  
  18.123  /* Add a page to a domain's p2m table */
  18.124  int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
    19.1 --- a/xen/include/asm-x86/paging.h	Tue Apr 08 11:41:27 2008 +0100
    19.2 +++ b/xen/include/asm-x86/paging.h	Wed Apr 09 11:30:32 2008 +0100
    19.3 @@ -183,7 +183,7 @@ void paging_vcpu_init(struct vcpu *v);
    19.4  
    19.5  /* Set up the paging-assistance-specific parts of a domain struct at
    19.6   * start of day.  Called for every domain from arch_domain_create() */
    19.7 -void paging_domain_init(struct domain *d);
    19.8 +int paging_domain_init(struct domain *d);
    19.9  
   19.10  /* Handler for paging-control ops: operations from user-space to enable
   19.11   * and disable ephemeral shadow modes (test mode and log-dirty mode) and
    20.1 --- a/xen/include/public/hvm/params.h	Tue Apr 08 11:41:27 2008 +0100
    20.2 +++ b/xen/include/public/hvm/params.h	Wed Apr 09 11:30:32 2008 +0100
    20.3 @@ -83,7 +83,8 @@
    20.4  
    20.5  /* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
    20.6  #define HVM_PARAM_HPET_ENABLED 11
    20.7 +#define HVM_PARAM_IDENT_PT     12
    20.8  
    20.9 -#define HVM_NR_PARAMS          12
   20.10 +#define HVM_NR_PARAMS          13
   20.11  
   20.12  #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
    21.1 --- a/xen/include/xen/hypercall.h	Tue Apr 08 11:41:27 2008 +0100
    21.2 +++ b/xen/include/xen/hypercall.h	Wed Apr 09 11:30:32 2008 +0100
    21.3 @@ -30,6 +30,7 @@ do_sched_op(
    21.4      int cmd,
    21.5      XEN_GUEST_HANDLE(void) arg);
    21.6  
    21.7 +extern spinlock_t domctl_lock;
    21.8  extern long
    21.9  do_domctl(
   21.10      XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);