ia64/xen-unstable

changeset 9696:0267063e050c

This is the initial patch for SMP PAE guest on x86-64 Xen.
For vcpus=2, the SMP PAE guest can do kernel build successfully.
And it improves the stability of SMP guests.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xiaohui Xin xiaohui.xin@intel.com
author kaf24@firebug.cl.cam.ac.uk
date Thu Apr 13 10:31:53 2006 +0100 (2006-04-13)
parents c3bb51c443a7
children 790f7a0be478
files xen/arch/x86/Makefile xen/arch/x86/shadow.c xen/arch/x86/shadow_public.c xen/include/asm-x86/mm.h xen/include/asm-x86/shadow_64.h
line diff
     1.1 --- a/xen/arch/x86/Makefile	Thu Apr 13 10:29:27 2006 +0100
     1.2 +++ b/xen/arch/x86/Makefile	Thu Apr 13 10:31:53 2006 +0100
     1.3 @@ -76,6 +76,7 @@ boot/mkelf32: boot/mkelf32.c
     1.4  	$(HOSTCC) $(HOSTCFLAGS) -o $@ $<
     1.5  
     1.6  shadow_guest32.o: shadow.c
     1.7 +shadow_guest32pae.o: shadow.c
     1.8  
     1.9  .PHONY: clean
    1.10  clean::
     2.1 --- a/xen/arch/x86/shadow.c	Thu Apr 13 10:29:27 2006 +0100
     2.2 +++ b/xen/arch/x86/shadow.c	Thu Apr 13 10:31:53 2006 +0100
     2.3 @@ -1531,14 +1531,10 @@ static void resync_pae_guest_l3(struct d
     2.4  
     2.5          idx = get_cr3_idxval(v);
     2.6          smfn = __shadow_status(
     2.7 -            d, ((unsigned long)(idx << PGT_score_shift) | entry->gpfn), PGT_l4_shadow);
     2.8 -
     2.9 -#ifndef NDEBUG
    2.10 +            d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn), PGT_l4_shadow);
    2.11 +
    2.12          if ( !smfn ) 
    2.13 -        {
    2.14 -            BUG();
    2.15 -        }
    2.16 -#endif
    2.17 +            continue;
    2.18  
    2.19          guest    = (pgentry_64_t *)map_domain_page(entry->gmfn);
    2.20          snapshot = (pgentry_64_t *)map_domain_page(entry->snapshot_mfn);
    2.21 @@ -1550,9 +1546,35 @@ static void resync_pae_guest_l3(struct d
    2.22              if ( entry_has_changed(
    2.23                      guest[index], snapshot[index], PAGE_FLAG_MASK) ) 
    2.24              {
    2.25 +                unsigned long gpfn;
    2.26 +
    2.27 +                /*
    2.28 +                 * Looks like it's no longer a page table. 
    2.29 +                 */
    2.30 +                if ( unlikely(entry_get_value(guest[index]) & PAE_PDPT_RESERVED) )
    2.31 +                {
    2.32 +                    if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT )
    2.33 +                        put_shadow_ref(entry_get_pfn(shadow_l3[i]));
    2.34 +
    2.35 +                    shadow_l3[i] = entry_empty();
    2.36 +                    continue;
    2.37 +                }
    2.38 +
    2.39 +                gpfn = entry_get_pfn(guest[index]);
    2.40 +
    2.41 +                if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
    2.42 +                {
    2.43 +                    if ( entry_get_flags(shadow_l3[i]) & _PAGE_PRESENT )
    2.44 +                        put_shadow_ref(entry_get_pfn(shadow_l3[i]));
    2.45 +
    2.46 +                    shadow_l3[i] = entry_empty();
    2.47 +                    continue;
    2.48 +                }
    2.49 +
    2.50                  validate_entry_change(d, &guest[index],
    2.51                                        &shadow_l3[i], PAGING_L3);
    2.52              }
    2.53 +
    2.54              if ( entry_get_value(guest[index]) != 0 )
    2.55                  max = i;
    2.56  
    2.57 @@ -1676,6 +1698,19 @@ static int resync_all(struct domain *d, 
    2.58                  {
    2.59                      int error;
    2.60  
    2.61 +#if CONFIG_PAGING_LEVELS == 4
    2.62 +                    unsigned long gpfn;
    2.63 +
    2.64 +                    gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT;
    2.65 +
    2.66 +                    if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
    2.67 +                    {
    2.68 +                        guest_l1_pgentry_t tmp_gl1e = guest_l1e_empty();
    2.69 +                        validate_pte_change(d, tmp_gl1e, sl1e_p);
    2.70 +                        continue;
    2.71 +                    }
    2.72 +#endif
    2.73 +
    2.74                      error = validate_pte_change(d, guest1[i], sl1e_p);
    2.75                      if ( error ==  -1 )
    2.76                          unshadow_l1 = 1;
    2.77 @@ -1698,6 +1733,7 @@ static int resync_all(struct domain *d, 
    2.78              perfc_incrc(resync_l1);
    2.79              perfc_incr_histo(wpt_updates, changed, PT_UPDATES);
    2.80              perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES);
    2.81 +
    2.82              if ( d->arch.ops->guest_paging_levels >= PAGING_L3 &&
    2.83                   unshadow_l1 ) {
    2.84                  pgentry_64_t l2e = { 0 };
    2.85 @@ -1804,18 +1840,22 @@ static int resync_all(struct domain *d, 
    2.86              for ( i = min_shadow; i <= max_shadow; i++ )
    2.87              {
    2.88                  if ( (i < min_snapshot) || (i > max_snapshot) ||
    2.89 -                  entry_has_changed(
    2.90 -                      guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
    2.91 +                    entry_has_changed(
    2.92 +                        guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) )
    2.93                  {
    2.94 -
    2.95                      unsigned long gpfn;
    2.96  
    2.97                      gpfn = entry_get_pfn(guest_pt[i]);
    2.98                      /*
    2.99 -                     * Looks like it's longer a page table.
   2.100 +                     * Looks like it's no longer a page table.
   2.101                       */
   2.102                      if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
   2.103 +                    {
   2.104 +                        if ( entry_get_flags(shadow_pt[i]) & _PAGE_PRESENT )
   2.105 +                            put_shadow_ref(entry_get_pfn(shadow_pt[i]));
   2.106 +                         shadow_pt[i] = entry_empty(); 
   2.107                          continue;
   2.108 +                    }
   2.109  
   2.110                      need_flush |= validate_entry_change(
   2.111                          d, &guest_pt[i], &shadow_pt[i],
   2.112 @@ -1864,11 +1904,17 @@ static int resync_all(struct domain *d, 
   2.113                      unsigned long gpfn;
   2.114  
   2.115                      gpfn = l4e_get_pfn(new_root_e);
   2.116 +
   2.117                      /*
   2.118 -                     * Looks like it's longer a page table.
   2.119 +                     * Looks like it's no longer a page table.
   2.120                       */
   2.121                      if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
   2.122 +                    {
   2.123 +                        if ( l4e_get_flags(shadow4[i]) & _PAGE_PRESENT )
   2.124 +                            put_shadow_ref(l4e_get_pfn(shadow4[i]));
   2.125 +                        shadow4[i] = l4e_empty(); 
   2.126                          continue;
   2.127 +                    }
   2.128  
   2.129                      if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) 
   2.130                      {
   2.131 @@ -2372,7 +2418,7 @@ static void shadow_update_pagetables(str
   2.132      if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) 
   2.133      {
   2.134          u32 index = get_cr3_idxval(v);
   2.135 -        gpfn = (index << PGT_score_shift) | gpfn;
   2.136 +        gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn;
   2.137      }
   2.138  #endif
   2.139  
   2.140 @@ -3233,8 +3279,35 @@ update_top_level_shadow(struct vcpu *v, 
   2.141      int i;
   2.142  
   2.143      for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
   2.144 +    {
   2.145 +        unsigned long gpfn;
   2.146 +
   2.147 +        /*
   2.148 +         * Looks like it's no longer a page table. 
   2.149 +         */
   2.150 +        if ( unlikely(entry_get_value(gple[index*4+i]) & PAE_PDPT_RESERVED) )
   2.151 +        {
   2.152 +            if ( entry_get_flags(sple[i]) & _PAGE_PRESENT )
   2.153 +                put_shadow_ref(entry_get_pfn(sple[i]));
   2.154 +
   2.155 +            sple[i] = entry_empty();
   2.156 +            continue;
   2.157 +        }
   2.158 +
   2.159 +        gpfn = entry_get_pfn(gple[index*4+i]);
   2.160 +
   2.161 +        if ( unlikely(gpfn != (gpfn & PGT_mfn_mask)) )
   2.162 +        {
   2.163 +            if ( entry_get_flags(sple[i]) & _PAGE_PRESENT )
   2.164 +                put_shadow_ref(entry_get_pfn(sple[i]));
   2.165 +
   2.166 +            sple[i] = entry_empty();
   2.167 +            continue;
   2.168 +        }
   2.169 +
   2.170          validate_entry_change(
   2.171              v->domain, &gple[index*4+i], &sple[i], PAGING_L3);
   2.172 +    }
   2.173  
   2.174      unmap_domain_page(sple);
   2.175  }
     3.1 --- a/xen/arch/x86/shadow_public.c	Thu Apr 13 10:29:27 2006 +0100
     3.2 +++ b/xen/arch/x86/shadow_public.c	Thu Apr 13 10:31:53 2006 +0100
     3.3 @@ -102,6 +102,15 @@ void free_shadow_pages(struct domain *d)
     3.4  
     3.5  int shadow_set_guest_paging_levels(struct domain *d, int levels)
     3.6  {
     3.7 +    struct vcpu *v = current;
     3.8 +
     3.9 +    /*
    3.10 +     * Need to wait for VCPU0 to complete the on-going shadow ops.
    3.11 +     */
    3.12 +
    3.13 +    if ( v->vcpu_id )
    3.14 +        return 1;
    3.15 +
    3.16      shadow_lock(d);
    3.17  
    3.18      switch(levels) {
    3.19 @@ -692,7 +701,6 @@ void free_fake_shadow_l2(struct domain *
    3.20  void free_shadow_page(unsigned long smfn)
    3.21  {
    3.22      struct page_info *page = mfn_to_page(smfn);
    3.23 -
    3.24      unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
    3.25      struct domain *d = page_get_owner(mfn_to_page(gmfn));
    3.26      unsigned long gpfn = mfn_to_gmfn(d, gmfn);
    3.27 @@ -709,10 +717,9 @@ void free_shadow_page(unsigned long smfn
    3.28          if ( !mfn )
    3.29              gpfn |= (1UL << 63);
    3.30      }
    3.31 -    if (d->arch.ops->guest_paging_levels == PAGING_L3)
    3.32 -        if (type == PGT_l4_shadow ) {
    3.33 -            gpfn = ((unsigned long)page->tlbflush_timestamp << PGT_score_shift) | gpfn;
    3.34 -        }
    3.35 +    if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
    3.36 +        if ( type == PGT_l4_shadow ) 
    3.37 +            gpfn = ((unsigned long)page->tlbflush_timestamp << PGT_pae_idx_shift) | gpfn;
    3.38  #endif
    3.39  
    3.40      delete_shadow_status(d, gpfn, gmfn, type);
    3.41 @@ -743,9 +750,24 @@ void free_shadow_page(unsigned long smfn
    3.42  #if CONFIG_PAGING_LEVELS >= 3
    3.43      case PGT_l2_shadow:
    3.44      case PGT_l3_shadow:
    3.45 +        shadow_demote(d, gpfn, gmfn);
    3.46 +        free_shadow_tables(d, smfn, shadow_type_to_level(type));
    3.47 +        d->arch.shadow_page_count--;
    3.48 +        break;
    3.49 +
    3.50      case PGT_l4_shadow:
    3.51          gpfn = gpfn & PGT_mfn_mask;
    3.52 -        shadow_demote(d, gpfn, gmfn);
    3.53 +        if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
    3.54 +        {
    3.55 +            /*
    3.56 +             * Since a single PDPT page can have multiple PDPs, it's possible
    3.57 +             * that shadow_demote() has been already called for gmfn.
    3.58 +             */
    3.59 +            if ( mfn_is_page_table(gmfn) )
    3.60 +                shadow_demote(d, gpfn, gmfn);
    3.61 +        } else
    3.62 +            shadow_demote(d, gpfn, gmfn);
    3.63 +
    3.64          free_shadow_tables(d, smfn, shadow_type_to_level(type));
    3.65          d->arch.shadow_page_count--;
    3.66          break;
    3.67 @@ -2041,7 +2063,16 @@ void shadow_sync_and_drop_references(
    3.68  
    3.69  void clear_all_shadow_status(struct domain *d)
    3.70  {
    3.71 +    struct vcpu *v = current;
    3.72 +
    3.73 +    /*
    3.74 +     * Don't clean up while other vcpus are working.
    3.75 +     */
    3.76 +    if ( v->vcpu_id )
    3.77 +        return;
    3.78 +
    3.79      shadow_lock(d);
    3.80 +
    3.81      free_shadow_pages(d);
    3.82      free_shadow_ht_entries(d);
    3.83      d->arch.shadow_ht = 
    3.84 @@ -2054,6 +2085,7 @@ void clear_all_shadow_status(struct doma
    3.85             shadow_ht_buckets * sizeof(struct shadow_status));
    3.86  
    3.87      free_out_of_sync_entries(d);
    3.88 +
    3.89      shadow_unlock(d);
    3.90  }
    3.91  
     4.1 --- a/xen/include/asm-x86/mm.h	Thu Apr 13 10:29:27 2006 +0100
     4.2 +++ b/xen/include/asm-x86/mm.h	Thu Apr 13 10:31:53 2006 +0100
     4.3 @@ -103,11 +103,13 @@ struct page_info
     4.4  #define PGT_high_mfn_mask   (0xfffUL << PGT_high_mfn_shift)
     4.5  #define PGT_mfn_mask        (((1U<<23)-1) | PGT_high_mfn_mask)
     4.6  #define PGT_high_mfn_nx     (0x800UL << PGT_high_mfn_shift)
     4.7 +#define PGT_pae_idx_shift   PGT_high_mfn_shift
     4.8  #else
     4.9   /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
    4.10  #define PGT_mfn_mask        ((1U<<23)-1)
    4.11   /* NX for PAE xen is not supported yet */
    4.12  #define PGT_high_mfn_nx     (1ULL << 63)
    4.13 +#define PGT_pae_idx_shift   23
    4.14  #endif
    4.15  
    4.16  #define PGT_score_shift     23
     5.1 --- a/xen/include/asm-x86/shadow_64.h	Thu Apr 13 10:29:27 2006 +0100
     5.2 +++ b/xen/include/asm-x86/shadow_64.h	Thu Apr 13 10:31:53 2006 +0100
     5.3 @@ -119,6 +119,8 @@ typedef struct { intpte_t lo; } pgentry_
     5.4  #define PAE_CR3_IDX_MASK    0x7f
     5.5  #define PAE_CR3_IDX_NO      128
     5.6  
     5.7 +#define PAE_PDPT_RESERVED   0x1e6 /* [8:5], [2,1] */
     5.8 +
     5.9  /******************************************************************************/
    5.10  static inline int  table_offset_64(unsigned long va, int level)
    5.11  {