ia64/xen-unstable

changeset 15812:86a154e1ef5d

[HVM] Shadow: don't shadow the p2m table.
For HVM vcpus with paging disabled, we used to shadow the p2m table,
and skip the p2m lookup to go from gfn to mfn. Instead, we now
provide a simple pagetable that gives a one-to-one mapping of 4GB, and
shadow that, making the translations from gfn to mfn via the p2m.
This removes the paging-disabled special-case code from the shadow
fault handler, and allows us to expand the p2m interface, since all HVM
translations now go through the same p2m lookups.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Fri Aug 31 11:06:22 2007 +0100 (2007-08-31)
parents 9fd5becfba6b
children 577313e3c0a6
files xen/arch/x86/domain.c xen/arch/x86/hvm/hvm.c xen/arch/x86/mm/hap/hap.c xen/arch/x86/mm/paging.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/arch/x86/mm/shadow/types.h xen/include/asm-x86/domain.h xen/include/asm-x86/p2m.h xen/include/asm-x86/paging.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Fri Aug 31 10:59:41 2007 +0100
     1.2 +++ b/xen/arch/x86/domain.c	Fri Aug 31 11:06:22 2007 +0100
     1.3 @@ -1723,13 +1723,13 @@ void domain_relinquish_resources(struct 
     1.4  
     1.5      BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
     1.6  
     1.7 +    /* Tear down paging-assistance stuff. */
     1.8 +    paging_teardown(d);
     1.9 +
    1.10      /* Drop the in-use references to page-table bases. */
    1.11      for_each_vcpu ( d, v )
    1.12          vcpu_destroy_pagetables(v);
    1.13  
    1.14 -    /* Tear down paging-assistance stuff. */
    1.15 -    paging_teardown(d);
    1.16 -
    1.17      /*
    1.18       * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
    1.19       * it automatically gets squashed when the guest's mappings go away.
     2.1 --- a/xen/arch/x86/hvm/hvm.c	Fri Aug 31 10:59:41 2007 +0100
     2.2 +++ b/xen/arch/x86/hvm/hvm.c	Fri Aug 31 11:06:22 2007 +0100
     2.3 @@ -517,7 +517,7 @@ void hvm_triple_fault(void)
     2.4  int hvm_set_cr0(unsigned long value)
     2.5  {
     2.6      struct vcpu *v = current;
     2.7 -    unsigned long mfn, old_base_mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
     2.8 +    unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
     2.9    
    2.10      HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
    2.11  
    2.12 @@ -569,10 +569,7 @@ int hvm_set_cr0(unsigned long value)
    2.13              }
    2.14  
    2.15              /* Now arch.guest_table points to machine physical. */
    2.16 -            old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
    2.17              v->arch.guest_table = pagetable_from_pfn(mfn);
    2.18 -            if ( old_base_mfn )
    2.19 -                put_page(mfn_to_page(old_base_mfn));
    2.20  
    2.21              HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
    2.22                          v->arch.hvm_vcpu.guest_cr[3], mfn);
     3.1 --- a/xen/arch/x86/mm/hap/hap.c	Fri Aug 31 10:59:41 2007 +0100
     3.2 +++ b/xen/arch/x86/mm/hap/hap.c	Fri Aug 31 11:06:22 2007 +0100
     3.3 @@ -621,8 +621,6 @@ static void hap_update_paging_modes(stru
     3.4          hvm_pae_enabled(v)       ? &hap_paging_pae_mode  :
     3.5                                     &hap_paging_protected_mode;
     3.6  
     3.7 -    v->arch.paging.translate_enabled = hvm_paging_enabled(v);
     3.8 -
     3.9      if ( pagetable_is_null(v->arch.monitor_table) )
    3.10      {
    3.11          mfn_t mmfn = hap_make_monitor_table(v);
     4.1 --- a/xen/arch/x86/mm/paging.c	Fri Aug 31 10:59:41 2007 +0100
     4.2 +++ b/xen/arch/x86/mm/paging.c	Fri Aug 31 11:06:22 2007 +0100
     4.3 @@ -496,10 +496,9 @@ void paging_dump_vcpu_info(struct vcpu *
     4.4          if ( paging_mode_shadow(v->domain) )
     4.5          {
     4.6              if ( v->arch.paging.mode )
     4.7 -                printk("shadowed %u-on-%u, %stranslated\n",
     4.8 +                printk("shadowed %u-on-%u\n",
     4.9                         v->arch.paging.mode->guest_levels,
    4.10 -                       v->arch.paging.mode->shadow.shadow_levels,
    4.11 -                       paging_vcpu_mode_translate(v) ? "" : "not ");
    4.12 +                       v->arch.paging.mode->shadow.shadow_levels);
    4.13              else
    4.14                  printk("not shadowed\n");
    4.15          }
     5.1 --- a/xen/arch/x86/mm/shadow/common.c	Fri Aug 31 10:59:41 2007 +0100
     5.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri Aug 31 11:06:22 2007 +0100
     5.3 @@ -2207,7 +2207,6 @@ static void sh_update_paging_modes(struc
     5.4  {
     5.5      struct domain *d = v->domain;
     5.6      struct paging_mode *old_mode = v->arch.paging.mode;
     5.7 -    mfn_t old_guest_table;
     5.8  
     5.9      ASSERT(shadow_locked_by_me(d));
    5.10  
    5.11 @@ -2256,7 +2255,6 @@ static void sh_update_paging_modes(struc
    5.12  #else
    5.13  #error unexpected paging mode
    5.14  #endif
    5.15 -        v->arch.paging.translate_enabled = !!shadow_mode_translate(d);
    5.16      }
    5.17      else
    5.18      {
    5.19 @@ -2266,37 +2264,17 @@ static void sh_update_paging_modes(struc
    5.20          ASSERT(shadow_mode_translate(d));
    5.21          ASSERT(shadow_mode_external(d));
    5.22  
    5.23 -        v->arch.paging.translate_enabled = hvm_paging_enabled(v);
    5.24 -        if ( !v->arch.paging.translate_enabled )
    5.25 +        if ( !hvm_paging_enabled(v) )
    5.26          {
    5.27 -            /* Set v->arch.guest_table to use the p2m map, and choose
    5.28 -             * the appropriate shadow mode */
    5.29 -            old_guest_table = pagetable_get_mfn(v->arch.guest_table);
    5.30 -#if CONFIG_PAGING_LEVELS == 2
    5.31 -            v->arch.guest_table =
    5.32 -                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
    5.33 -            v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
    5.34 -#elif CONFIG_PAGING_LEVELS == 3 
    5.35 -            v->arch.guest_table =
    5.36 -                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
    5.37 -            v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
    5.38 -#else /* CONFIG_PAGING_LEVELS == 4 */
    5.39 -            { 
    5.40 -                l4_pgentry_t *l4e; 
    5.41 -                /* Use the start of the first l3 table as a PAE l3 */
    5.42 -                ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
    5.43 -                l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
    5.44 -                ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
    5.45 -                v->arch.guest_table =
    5.46 -                    pagetable_from_pfn(l4e_get_pfn(l4e[0]));
    5.47 -                sh_unmap_domain_page(l4e);
    5.48 -            }
    5.49 -            v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
    5.50 +            /* When the guest has CR0.PG clear, we provide a 32-bit, non-PAE
    5.51 +             * pagetable for it, mapping 4 GB one-to-one using a single l2
    5.52 +             * page of 1024 superpage mappings */
    5.53 +            v->arch.guest_table = d->arch.paging.shadow.unpaged_pagetable;
    5.54 +#if CONFIG_PAGING_LEVELS >= 3
    5.55 +            v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2);
    5.56 +#else
    5.57 +            v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2);
    5.58  #endif
    5.59 -            /* Fix up refcounts on guest_table */
    5.60 -            get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d);
    5.61 -            if ( mfn_x(old_guest_table) != 0 )
    5.62 -                put_page(mfn_to_page(old_guest_table));
    5.63          }
    5.64          else
    5.65          {
    5.66 @@ -2428,7 +2406,9 @@ int shadow_enable(struct domain *d, u32 
    5.67   * Returns 0 for success, -errno for failure. */
    5.68  {    
    5.69      unsigned int old_pages;
    5.70 -    int rv = 0;
    5.71 +    struct page_info *pg = NULL;
    5.72 +    uint32_t *e;
    5.73 +    int i, rv = 0;
    5.74  
    5.75      mode |= PG_SH_enable;
    5.76  
    5.77 @@ -2469,6 +2449,28 @@ int shadow_enable(struct domain *d, u32 
    5.78              goto out_unlocked;
    5.79      }
    5.80  
    5.81 +    /* HVM domains need an extra pagetable for vcpus that think they
    5.82 +     * have paging disabled */
    5.83 +    if ( is_hvm_domain(d) )
    5.84 +    {
    5.85 +        /* Get a single page from the shadow pool.  Take it via the 
    5.86 +         * P2M interface to make freeing it simpler afterwards. */
    5.87 +        pg = shadow_alloc_p2m_page(d);
    5.88 +        if ( pg == NULL )
    5.89 +        {
    5.90 +            rv = -ENOMEM;
    5.91 +            goto out_unlocked;
    5.92 +        }
    5.93 +        /* Fill it with 32-bit, non-PAE superpage entries, each mapping 4MB
    5.94 +         * of virtual address space onto the same physical address range */ 
    5.95 +        e = sh_map_domain_page(page_to_mfn(pg));
    5.96 +        for ( i = 0; i < PAGE_SIZE / sizeof(*e); i++ )
    5.97 +            e[i] = ((0x400000U * i)
    5.98 +                    | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER 
    5.99 +                    | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
   5.100 +        sh_unmap_domain_page(e);
   5.101 +        pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated;
   5.102 +    }
   5.103  
   5.104      shadow_lock(d);
   5.105  
   5.106 @@ -2492,6 +2494,10 @@ int shadow_enable(struct domain *d, u32 
   5.107      d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
   5.108  #endif
   5.109  
   5.110 +    /* Record the 1-to-1 pagetable we just made */
   5.111 +    if ( is_hvm_domain(d) )
   5.112 +        d->arch.paging.shadow.unpaged_pagetable = pagetable_from_page(pg);
   5.113 +
   5.114      /* Update the bits */
   5.115      sh_new_mode(d, mode);
   5.116  
   5.117 @@ -2500,6 +2506,8 @@ int shadow_enable(struct domain *d, u32 
   5.118   out_unlocked:
   5.119      if ( rv != 0 && !pagetable_is_null(d->arch.phys_table) )
   5.120          p2m_teardown(d);
   5.121 +    if ( rv != 0 && pg != NULL )
   5.122 +        shadow_free_p2m_page(d, pg);
   5.123      domain_unpause(d);
   5.124      return rv;
   5.125  }
   5.126 @@ -2579,6 +2587,21 @@ void shadow_teardown(struct domain *d)
   5.127          ASSERT(d->arch.paging.shadow.total_pages == 0);
   5.128      }
   5.129  
   5.130 +    /* Free the non-paged-vcpus pagetable; must happen after we've 
   5.131 +     * destroyed any shadows of it or sh_destroy_shadow will get confused. */
   5.132 +    if ( !pagetable_is_null(d->arch.paging.shadow.unpaged_pagetable) )
   5.133 +    {
   5.134 +        for_each_vcpu(d, v)
   5.135 +        {
   5.136 +            ASSERT(is_hvm_vcpu(v));
   5.137 +            if ( !hvm_paging_enabled(v) )
   5.138 +                v->arch.guest_table = pagetable_null();
   5.139 +        }
   5.140 +        shadow_free_p2m_page(d, 
   5.141 +            pagetable_get_page(d->arch.paging.shadow.unpaged_pagetable));
   5.142 +        d->arch.paging.shadow.unpaged_pagetable = pagetable_null();
   5.143 +    }
   5.144 +
   5.145      /* We leave the "permanent" shadow modes enabled, but clear the
   5.146       * log-dirty mode bit.  We don't want any more mark_dirty()
   5.147       * calls now that we've torn down the bitmap */
   5.148 @@ -2756,10 +2779,6 @@ shadow_write_p2m_entry(struct vcpu *v, u
   5.149      /* update the entry with new content */
   5.150      safe_write_pte(p, new);
   5.151  
   5.152 -    /* The P2M can be shadowed: keep the shadows synced */
   5.153 -    if ( d->vcpu[0] != NULL )
   5.154 -        (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, p, sizeof(*p));
   5.155 -
   5.156      /* install P2M in monitors for PAE Xen */
   5.157  #if CONFIG_PAGING_LEVELS == 3
   5.158      if ( level == 3 ) {
     6.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Aug 31 10:59:41 2007 +0100
     6.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Aug 31 11:06:22 2007 +0100
     6.3 @@ -173,9 +173,12 @@ static inline int
     6.4  guest_supports_superpages(struct vcpu *v)
     6.5  {
     6.6      /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
     6.7 -     * CR4.PSE is set or the guest is in PAE or long mode */
     6.8 -    return (is_hvm_vcpu(v) && (GUEST_PAGING_LEVELS != 2 
     6.9 -                             || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));
    6.10 +     * CR4.PSE is set or the guest is in PAE or long mode. 
    6.11 +     * It's also used in the dummy PT for vcpus with CR4.PG cleared. */
    6.12 +    return (is_hvm_vcpu(v) && 
    6.13 +            (GUEST_PAGING_LEVELS != 2 
    6.14 +             || !hvm_paging_enabled(v)
    6.15 +             || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));
    6.16  }
    6.17  
    6.18  static inline int
    6.19 @@ -205,8 +208,9 @@ guest_supports_nx(struct vcpu *v)
    6.20  static inline int 
    6.21  guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
    6.22  {
    6.23 -    ASSERT(!guest_op || shadow_locked_by_me(v->domain));
    6.24 -
    6.25 +    struct domain *d = v->domain;
    6.26 +    ASSERT(!guest_op || shadow_locked_by_me(d));
    6.27 +    
    6.28      perfc_incr(shadow_guest_walk);
    6.29      memset(gw, 0, sizeof(*gw));
    6.30      gw->va = va;
    6.31 @@ -219,11 +223,11 @@ guest_walk_tables(struct vcpu *v, unsign
    6.32          + guest_l4_table_offset(va);
    6.33      /* Walk down to the l3e */
    6.34      if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
    6.35 -    gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e));
    6.36 +    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e));
    6.37      if ( !mfn_valid(gw->l3mfn) ) return 1;
    6.38      /* This mfn is a pagetable: make sure the guest can't write to it. */
    6.39      if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
    6.40 -        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
    6.41 +        flush_tlb_mask(d->domain_dirty_cpumask); 
    6.42      gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
    6.43          + guest_l3_table_offset(va);
    6.44  #else /* PAE only... */
    6.45 @@ -232,11 +236,11 @@ guest_walk_tables(struct vcpu *v, unsign
    6.46  #endif /* PAE or 64... */
    6.47      /* Walk down to the l2e */
    6.48      if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
    6.49 -    gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e));
    6.50 +    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e));
    6.51      if ( !mfn_valid(gw->l2mfn) ) return 1;
    6.52      /* This mfn is a pagetable: make sure the guest can't write to it. */
    6.53      if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
    6.54 -        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
    6.55 +        flush_tlb_mask(d->domain_dirty_cpumask); 
    6.56      gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
    6.57          + guest_l2_table_offset(va);
    6.58  #else /* 32-bit only... */
    6.59 @@ -274,12 +278,12 @@ guest_walk_tables(struct vcpu *v, unsign
    6.60      else 
    6.61      {
    6.62          /* Not a superpage: carry on and find the l1e. */
    6.63 -        gw->l1mfn = vcpu_gfn_to_mfn(v, guest_l2e_get_gfn(*gw->l2e));
    6.64 +        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e));
    6.65          if ( !mfn_valid(gw->l1mfn) ) return 1;
    6.66          /* This mfn is a pagetable: make sure the guest can't write to it. */
    6.67          if ( guest_op 
    6.68               && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
    6.69 -            flush_tlb_mask(v->domain->domain_dirty_cpumask); 
    6.70 +            flush_tlb_mask(d->domain_dirty_cpumask); 
    6.71          gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
    6.72              + guest_l1_table_offset(va);
    6.73          gw->eff_l1e = *gw->l1e;
    6.74 @@ -2191,6 +2195,7 @@ static int validate_gl4e(struct vcpu *v,
    6.75      guest_l4e_t *new_gl4e = new_ge;
    6.76      shadow_l4e_t *sl4p = se;
    6.77      mfn_t sl3mfn = _mfn(INVALID_MFN);
    6.78 +    struct domain *d = v->domain;
    6.79      int result = 0;
    6.80  
    6.81      perfc_incr(shadow_validate_gl4e_calls);
    6.82 @@ -2198,7 +2203,7 @@ static int validate_gl4e(struct vcpu *v,
    6.83      if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
    6.84      {
    6.85          gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
    6.86 -        mfn_t gl3mfn = vcpu_gfn_to_mfn(v, gl3gfn);
    6.87 +        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn);
    6.88          if ( mfn_valid(gl3mfn) )
    6.89              sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
    6.90          else
    6.91 @@ -2208,11 +2213,11 @@ static int validate_gl4e(struct vcpu *v,
    6.92                               sl3mfn, &new_sl4e, ft_prefetch);
    6.93  
    6.94      // check for updates to xen reserved slots
    6.95 -    if ( !shadow_mode_external(v->domain) )
    6.96 +    if ( !shadow_mode_external(d) )
    6.97      {
    6.98          int shadow_index = (((unsigned long)sl4p & ~PAGE_MASK) /
    6.99                              sizeof(shadow_l4e_t));
   6.100 -        int reserved_xen_slot = !is_guest_l4_slot(v->domain, shadow_index);
   6.101 +        int reserved_xen_slot = !is_guest_l4_slot(d, shadow_index);
   6.102  
   6.103          if ( unlikely(reserved_xen_slot) )
   6.104          {
   6.105 @@ -2250,7 +2255,7 @@ static int validate_gl3e(struct vcpu *v,
   6.106      if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
   6.107      {
   6.108          gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
   6.109 -        mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
   6.110 +        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn);
   6.111          if ( mfn_valid(gl2mfn) )
   6.112              sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
   6.113          else
   6.114 @@ -2294,7 +2299,7 @@ static int validate_gl2e(struct vcpu *v,
   6.115          }
   6.116          else
   6.117          {
   6.118 -            mfn_t gl1mfn = vcpu_gfn_to_mfn(v, gl1gfn);
   6.119 +            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn);
   6.120              if ( mfn_valid(gl1mfn) )
   6.121                  sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
   6.122              else
   6.123 @@ -2361,10 +2366,9 @@ static int validate_gl1e(struct vcpu *v,
   6.124      perfc_incr(shadow_validate_gl1e_calls);
   6.125  
   6.126      gfn = guest_l1e_get_gfn(*new_gl1e);
   6.127 -    gmfn = vcpu_gfn_to_mfn(v, gfn);
   6.128 -
   6.129 -    mmio = (is_hvm_vcpu(v) && paging_vcpu_mode_translate(v) && 
   6.130 -            mmio_space(gfn_to_paddr(gfn)));
   6.131 +    gmfn = gfn_to_mfn(v->domain, gfn);
   6.132 +
   6.133 +    mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)));
   6.134      l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 
   6.135                               ft_prefetch, mmio);
   6.136      
   6.137 @@ -2593,10 +2597,8 @@ static void sh_prefetch(struct vcpu *v, 
   6.138  
   6.139          /* Look at the gfn that the l1e is pointing at */
   6.140          gfn = guest_l1e_get_gfn(gl1e);
   6.141 -        gmfn = vcpu_gfn_to_mfn(v, gfn);
   6.142 -        mmio = ( is_hvm_vcpu(v) 
   6.143 -                 && paging_vcpu_mode_translate(v) 
   6.144 -                 && mmio_space(gfn_to_paddr(gfn)) );
   6.145 +        gmfn = gfn_to_mfn(v->domain, gfn);
   6.146 +        mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) );
   6.147  
   6.148          /* Propagate the entry.  Safe to use a pointer to our local 
   6.149           * gl1e, since this is not a demand-fetch so there will be no 
   6.150 @@ -2657,23 +2659,14 @@ static int sh_page_fault(struct vcpu *v,
   6.151          {
   6.152              if ( sh_l1e_is_gnp(sl1e) )
   6.153              {
   6.154 -                if ( likely(!is_hvm_domain(d) ||
   6.155 -                            paging_vcpu_mode_translate(v)) )
   6.156 -                { 
   6.157 -                    /* Not-present in a guest PT: pass to the guest as
   6.158 -                     * a not-present fault (by flipping two bits). */
   6.159 -                    ASSERT(regs->error_code & PFEC_page_present);
   6.160 -                    regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
   6.161 -                    reset_early_unshadow(v);
   6.162 -                    perfc_incr(shadow_fault_fast_gnp);
   6.163 -                    SHADOW_PRINTK("fast path not-present\n");
   6.164 -                    return 0;
   6.165 -                }
   6.166 -                else 
   6.167 -                {
   6.168 -                    /* Not-present in the P2M: MMIO */
   6.169 -                    gpa = va;
   6.170 -                }
   6.171 +                /* Not-present in a guest PT: pass to the guest as
   6.172 +                 * a not-present fault (by flipping two bits). */
   6.173 +                ASSERT(regs->error_code & PFEC_page_present);
   6.174 +                regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
   6.175 +                reset_early_unshadow(v);
   6.176 +                perfc_incr(shadow_fault_fast_gnp);
   6.177 +                SHADOW_PRINTK("fast path not-present\n");
   6.178 +                return 0;
   6.179              }
   6.180              else
   6.181              {
   6.182 @@ -2745,13 +2738,6 @@ static int sh_page_fault(struct vcpu *v,
   6.183      //
   6.184      if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
   6.185      {
   6.186 -        if ( is_hvm_domain(d) && !paging_vcpu_mode_translate(v) )
   6.187 -        {
   6.188 -            /* Not present in p2m map, means this is mmio */
   6.189 -            gpa = va;
   6.190 -            goto mmio;
   6.191 -        }
   6.192 -
   6.193          perfc_incr(shadow_fault_bail_not_present);
   6.194          goto not_a_shadow_fault;
   6.195      }
   6.196 @@ -2801,10 +2787,8 @@ static int sh_page_fault(struct vcpu *v,
   6.197  
   6.198      /* What mfn is the guest trying to access? */
   6.199      gfn = guest_l1e_get_gfn(gw.eff_l1e);
   6.200 -    gmfn = vcpu_gfn_to_mfn(v, gfn);
   6.201 -    mmio = (is_hvm_domain(d)
   6.202 -            && paging_vcpu_mode_translate(v) 
   6.203 -            && mmio_space(gfn_to_paddr(gfn)));
   6.204 +    gmfn = gfn_to_mfn(d, gfn);
   6.205 +    mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn)));
   6.206  
   6.207      if ( !mmio && !mfn_valid(gmfn) )
   6.208      {
   6.209 @@ -3523,20 +3507,18 @@ sh_update_cr3(struct vcpu *v, int do_loc
   6.210          ASSERT(shadow_mode_external(d));
   6.211  
   6.212          // Is paging enabled on this vcpu?
   6.213 -        if ( paging_vcpu_mode_translate(v) )
   6.214 +        if ( hvm_paging_enabled(v) )
   6.215          {
   6.216              gfn = _gfn(paddr_to_pfn(v->arch.hvm_vcpu.guest_cr[3]));
   6.217 -            gmfn = vcpu_gfn_to_mfn(v, gfn);
   6.218 +            gmfn = gfn_to_mfn(d, gfn);
   6.219              ASSERT(mfn_valid(gmfn));
   6.220              ASSERT(pagetable_get_pfn(v->arch.guest_table) == mfn_x(gmfn));
   6.221          } 
   6.222          else 
   6.223          {
   6.224 -            /* Paging disabled: guest_table points at (part of) p2m */
   6.225 -#if SHADOW_PAGING_LEVELS != 3 /* in 3-on-4, guest-table is in slot 0 of p2m */
   6.226 -            /* For everything else, they sould be the same */
   6.227 -            ASSERT(v->arch.guest_table.pfn == d->arch.phys_table.pfn);
   6.228 -#endif
   6.229 +            /* Paging disabled: guest_table points at a 32-bit 1-to-1 map */
   6.230 +            ASSERT(v->arch.guest_table.pfn
   6.231 +                   == d->arch.paging.shadow.unpaged_pagetable.pfn);
   6.232          }
   6.233      }
   6.234  #endif
   6.235 @@ -3574,11 +3556,11 @@ sh_update_cr3(struct vcpu *v, int do_loc
   6.236        * until the next CR3 write makes us refresh our cache. */
   6.237       ASSERT(v->arch.paging.shadow.guest_vtable == NULL);
   6.238   
   6.239 -     if ( shadow_mode_external(d) && paging_vcpu_mode_translate(v) ) 
   6.240 -         /* Paging enabled: find where in the page the l3 table is */
   6.241 +     if ( shadow_mode_external(d) ) 
   6.242 +         /* Find where in the page the l3 table is */
   6.243           guest_idx = guest_index((void *)v->arch.hvm_vcpu.guest_cr[3]);
   6.244       else
   6.245 -         /* Paging disabled or PV: l3 is at the start of a page */ 
   6.246 +         /* PV guest: l3 is at the start of a page */ 
   6.247           guest_idx = 0; 
   6.248  
   6.249       // Ignore the low 2 bits of guest_idx -- they are really just
   6.250 @@ -3635,7 +3617,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
   6.251              if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
   6.252              {
   6.253                  gl2gfn = guest_l3e_get_gfn(gl3e[i]);
   6.254 -                gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
   6.255 +                gl2mfn = gfn_to_mfn(d, gl2gfn);
   6.256                  flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 
   6.257              }
   6.258          }
   6.259 @@ -3647,7 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
   6.260              if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
   6.261              {
   6.262                  gl2gfn = guest_l3e_get_gfn(gl3e[i]);
   6.263 -                gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
   6.264 +                gl2mfn = gfn_to_mfn(d, gl2gfn);
   6.265                  sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
   6.266                                         ? SH_type_l2h_shadow 
   6.267                                         : SH_type_l2_shadow);
   6.268 @@ -4001,7 +3983,7 @@ static inline void * emulate_map_dest(st
   6.269          }
   6.270      }
   6.271  #endif
   6.272 -    mfn = vcpu_gfn_to_mfn(v, gfn);
   6.273 +    mfn = gfn_to_mfn(v->domain, gfn);
   6.274  
   6.275      errcode = PFEC_write_access;
   6.276      if ( !(flags & _PAGE_PRESENT) ) 
   6.277 @@ -4268,7 +4250,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
   6.278           != PGT_writable_page ) 
   6.279          return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
   6.280      else 
   6.281 -        return gfn_to_mfn(v->domain, gfn_x(gfn));
   6.282 +        return gfn_to_mfn(v->domain, gfn);
   6.283  } 
   6.284  
   6.285  
     7.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri Aug 31 10:59:41 2007 +0100
     7.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Aug 31 11:06:22 2007 +0100
     7.3 @@ -431,6 +431,13 @@ int shadow_cmpxchg_guest_entry(struct vc
     7.4  #undef mfn_valid
     7.5  #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
     7.6  
     7.7 +/* Override pagetable_t <-> struct page_info conversions to work with mfn_t */
     7.8 +#undef pagetable_get_page
     7.9 +#define pagetable_get_page(x)   mfn_to_page(pagetable_get_mfn(x))
    7.10 +#undef pagetable_from_page
    7.11 +#define pagetable_from_page(pg) pagetable_from_mfn(page_to_mfn(pg))
    7.12 +
    7.13 +
    7.14  #if GUEST_PAGING_LEVELS >= 3
    7.15  # define is_lo_pte(_vaddr) (((_vaddr)&0x4)==0)
    7.16  #else
     8.1 --- a/xen/arch/x86/mm/shadow/types.h	Fri Aug 31 10:59:41 2007 +0100
     8.2 +++ b/xen/arch/x86/mm/shadow/types.h	Fri Aug 31 11:06:22 2007 +0100
     8.3 @@ -406,28 +406,17 @@ valid_gfn(gfn_t m)
     8.4      return VALID_GFN(gfn_x(m));
     8.5  }
     8.6  
     8.7 -/* Translation between mfns and gfns */
     8.8 -
     8.9 -// vcpu-specific version of gfn_to_mfn().  This is where we hide the dirty
    8.10 -// little secret that, for hvm guests with paging disabled, nearly all of the
    8.11 -// shadow code actually think that the guest is running on *untranslated* page
    8.12 -// tables (which is actually domain->phys_table).
    8.13 -//
    8.14 -
    8.15 -static inline mfn_t
    8.16 -vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn)
    8.17 -{
    8.18 -    if ( !paging_vcpu_mode_translate(v) )
    8.19 -        return _mfn(gfn_x(gfn));
    8.20 -    return gfn_to_mfn(v->domain, gfn_x(gfn));
    8.21 -}
    8.22 -
    8.23  static inline paddr_t
    8.24  gfn_to_paddr(gfn_t gfn)
    8.25  {
    8.26      return ((paddr_t)gfn_x(gfn)) << PAGE_SHIFT;
    8.27  }
    8.28  
    8.29 +/* Override gfn_to_mfn to work with gfn_t */
    8.30 +#undef gfn_to_mfn
    8.31 +#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g))
    8.32 +
    8.33 +
    8.34  /* Type used for recording a walk through guest pagetables.  It is
    8.35   * filled in by the pagetable walk function, and also used as a cache
    8.36   * for later walks.  
     9.1 --- a/xen/include/asm-x86/domain.h	Fri Aug 31 10:59:41 2007 +0100
     9.2 +++ b/xen/include/asm-x86/domain.h	Fri Aug 31 11:06:22 2007 +0100
     9.3 @@ -86,6 +86,9 @@ struct shadow_domain {
     9.4      unsigned int      free_pages;   /* number of pages on freelists */
     9.5      unsigned int      p2m_pages;    /* number of pages allocates to p2m */
     9.6  
     9.7 +    /* 1-to-1 map for use when HVM vcpus have paging disabled */
     9.8 +    pagetable_t unpaged_pagetable;
     9.9 +
    9.10      /* Shadow hashtable */
    9.11      struct shadow_page_info **hash_table;
    9.12      int hash_walking;  /* Some function is walking the hash table */
    9.13 @@ -181,8 +184,6 @@ struct paging_domain {
    9.14  struct paging_vcpu {
    9.15      /* Pointers to mode-specific entry points. */
    9.16      struct paging_mode *mode;
    9.17 -    /* HVM guest: paging enabled (CR0.PG)?  */
    9.18 -    unsigned int translate_enabled:1;
    9.19      /* HVM guest: last emulate was to a pagetable */
    9.20      unsigned int last_write_was_pt:1;
    9.21      /* Translated guest: virtual TLB */
    10.1 --- a/xen/include/asm-x86/p2m.h	Fri Aug 31 10:59:41 2007 +0100
    10.2 +++ b/xen/include/asm-x86/p2m.h	Fri Aug 31 11:06:22 2007 +0100
    10.3 @@ -61,7 +61,8 @@ static inline mfn_t gfn_to_mfn_current(u
    10.4  mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
    10.5  
    10.6  /* General conversion function from gfn to mfn */
    10.7 -static inline mfn_t gfn_to_mfn(struct domain *d, unsigned long gfn)
    10.8 +#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g))
    10.9 +static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn)
   10.10  {
   10.11      if ( !paging_mode_translate(d) )
   10.12          return _mfn(gfn);
    11.1 --- a/xen/include/asm-x86/paging.h	Fri Aug 31 10:59:41 2007 +0100
    11.2 +++ b/xen/include/asm-x86/paging.h	Fri Aug 31 11:06:22 2007 +0100
    11.3 @@ -66,19 +66,6 @@
    11.4  /* flags used for paging debug */
    11.5  #define PAGING_DEBUG_LOGDIRTY 0
    11.6  
    11.7 -/******************************************************************************
    11.8 - * The equivalent for a particular vcpu of a shadowed domain. */
    11.9 -
   11.10 -/* Is this vcpu using the P2M table to translate between GFNs and MFNs?
   11.11 - *
   11.12 - * This is true of translated HVM domains on a vcpu which has paging
   11.13 - * enabled.  (HVM vcpus with paging disabled are using the p2m table as
   11.14 - * its paging table, so no translation occurs in this case.)
   11.15 - * It is also true for all vcpus of translated PV domains. */
   11.16 -#define paging_vcpu_mode_translate(_v) ((_v)->arch.paging.translate_enabled)
   11.17 -
   11.18 -
   11.19 -
   11.20  /*****************************************************************************
   11.21   * Mode-specific entry points into the shadow code.  
   11.22   *
   11.23 @@ -222,9 +209,6 @@ static inline int paging_invlpg(struct v
   11.24  #define INVALID_GFN (-1UL)
   11.25  static inline unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va)
   11.26  {
   11.27 -    if ( unlikely(!paging_vcpu_mode_translate(v)) )
   11.28 -        return va >> PAGE_SHIFT;
   11.29 -
   11.30      return v->arch.paging.mode->gva_to_gfn(v, va);
   11.31  }
   11.32