ia64/xen-unstable

changeset 16130:415beae13d06

PV guests don't require order-non-zero pages for shadowing, hence lift
the requirement on such being available for allocation when enabling
shadow mode, removing the potential for live migration to fail due to
fragmented memory.

Has seen little testing only so far.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Mon Oct 15 09:28:14 2007 +0100 (2007-10-15)
parents a76f3f7ddca0
children c918a68617c9
files xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h
line diff
     1.1 --- a/xen/arch/x86/mm/shadow/common.c	Sat Oct 13 08:44:06 2007 +0100
     1.2 +++ b/xen/arch/x86/mm/shadow/common.c	Mon Oct 15 09:28:14 2007 +0100
     1.3 @@ -708,15 +708,29 @@ shadow_order(unsigned int shadow_type)
     1.4  #endif
     1.5  }
     1.6  
     1.7 -
     1.8 -/* Do we have a free chunk of at least this order? */
     1.9 -static inline int chunk_is_available(struct domain *d, int order)
    1.10 +static inline unsigned int
    1.11 +shadow_max_order(struct domain *d)
    1.12  {
    1.13 -    int i;
    1.14 -    
    1.15 -    for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
    1.16 -        if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
    1.17 -            return 1;
    1.18 +    return is_hvm_domain(d) ? SHADOW_MAX_ORDER : 0;
    1.19 +}
    1.20 +
    1.21 +/* Do we have at total of count pages of the requested order free? */
    1.22 +static inline int space_is_available(
    1.23 +    struct domain *d,
    1.24 +    unsigned int order,
    1.25 +    unsigned int count)
    1.26 +{
    1.27 +    for ( ; order <= shadow_max_order(d); ++order )
    1.28 +    {
    1.29 +        unsigned int n = count;
    1.30 +        const struct list_head *p;
    1.31 +
    1.32 +        list_for_each ( p, &d->arch.paging.shadow.freelists[order] )
    1.33 +            if ( --n == 0 )
    1.34 +                return 1;
    1.35 +        count = (count + 1) >> 1;
    1.36 +    }
    1.37 +
    1.38      return 0;
    1.39  }
    1.40  
    1.41 @@ -752,12 +766,12 @@ static void shadow_unhook_mappings(struc
    1.42  }
    1.43  
    1.44  
    1.45 -/* Make sure there is at least one chunk of the required order available
    1.46 - * in the shadow page pool. This must be called before any calls to
    1.47 - * shadow_alloc().  Since this will free existing shadows to make room,
    1.48 - * it must be called early enough to avoid freeing shadows that the
    1.49 - * caller is currently working on. */
    1.50 -void shadow_prealloc(struct domain *d, unsigned int order)
    1.51 +/* Make sure there are at least count order-sized pages
    1.52 + * available in the shadow page pool. */
    1.53 +static void _shadow_prealloc(
    1.54 +    struct domain *d,
    1.55 +    unsigned int order,
    1.56 +    unsigned int count)
    1.57  {
    1.58      /* Need a vpcu for calling unpins; for now, since we don't have
    1.59       * per-vcpu shadows, any will do */
    1.60 @@ -768,7 +782,8 @@ void shadow_prealloc(struct domain *d, u
    1.61      mfn_t smfn;
    1.62      int i;
    1.63  
    1.64 -    if ( chunk_is_available(d, order) ) return; 
    1.65 +    ASSERT(order <= shadow_max_order(d));
    1.66 +    if ( space_is_available(d, order, count) ) return;
    1.67      
    1.68      v = current;
    1.69      if ( v->domain != d )
    1.70 @@ -785,8 +800,8 @@ void shadow_prealloc(struct domain *d, u
    1.71          /* Unpin this top-level shadow */
    1.72          sh_unpin(v, smfn);
    1.73  
    1.74 -        /* See if that freed up a chunk of appropriate size */
    1.75 -        if ( chunk_is_available(d, order) ) return;
    1.76 +        /* See if that freed up enough space */
    1.77 +        if ( space_is_available(d, order, count) ) return;
    1.78      }
    1.79  
    1.80      /* Stage two: all shadow pages are in use in hierarchies that are
    1.81 @@ -803,8 +818,8 @@ void shadow_prealloc(struct domain *d, u
    1.82                                 pagetable_get_mfn(v2->arch.shadow_table[i]));
    1.83                  cpus_or(flushmask, v2->vcpu_dirty_cpumask, flushmask);
    1.84  
    1.85 -                /* See if that freed up a chunk of appropriate size */
    1.86 -                if ( chunk_is_available(d, order) ) 
    1.87 +                /* See if that freed up enough space */
    1.88 +                if ( space_is_available(d, order, count) )
    1.89                  {
    1.90                      flush_tlb_mask(flushmask);
    1.91                      return;
    1.92 @@ -814,15 +829,26 @@ void shadow_prealloc(struct domain *d, u
    1.93      
    1.94      /* Nothing more we can do: all remaining shadows are of pages that
    1.95       * hold Xen mappings for some vcpu.  This can never happen. */
    1.96 -    SHADOW_ERROR("Can't pre-allocate %i shadow pages!\n"
    1.97 +    SHADOW_ERROR("Can't pre-allocate %u order-%u shadow pages!\n"
    1.98                   "  shadow pages total = %u, free = %u, p2m=%u\n",
    1.99 -                 1 << order,
   1.100 +                 count, order,
   1.101                   d->arch.paging.shadow.total_pages,
   1.102                   d->arch.paging.shadow.free_pages,
   1.103                   d->arch.paging.shadow.p2m_pages);
   1.104      BUG();
   1.105  }
   1.106  
   1.107 +/* Make sure there are at least count pages of the order according to
   1.108 + * type available in the shadow page pool.
   1.109 + * This must be called before any calls to shadow_alloc().  Since this
   1.110 + * will free existing shadows to make room, it must be called early enough
   1.111 + * to avoid freeing shadows that the caller is currently working on. */
   1.112 +void shadow_prealloc(struct domain *d, u32 type, unsigned int count)
   1.113 +{
   1.114 +    ASSERT(type != SH_type_p2m_table);
   1.115 +    return _shadow_prealloc(d, shadow_order(type), count);
   1.116 +}
   1.117 +
   1.118  /* Deliberately free all the memory we can: this will tear down all of
   1.119   * this domain's shadows */
   1.120  static void shadow_blow_tables(struct domain *d) 
   1.121 @@ -899,7 +925,9 @@ mfn_t shadow_alloc(struct domain *d,
   1.122      int i;
   1.123  
   1.124      ASSERT(shadow_locked_by_me(d));
   1.125 -    ASSERT(order <= SHADOW_MAX_ORDER);
   1.126 +    if (shadow_type == SH_type_p2m_table && order > shadow_max_order(d))
   1.127 +        order = shadow_max_order(d);
   1.128 +    ASSERT(order <= shadow_max_order(d));
   1.129      ASSERT(shadow_type != SH_type_none);
   1.130      perfc_incr(shadow_alloc);
   1.131  
   1.132 @@ -1000,7 +1028,7 @@ void shadow_free(struct domain *d, mfn_t
   1.133      }
   1.134  
   1.135      /* Merge chunks as far as possible. */
   1.136 -    while ( order < SHADOW_MAX_ORDER )
   1.137 +    for ( ; order < shadow_max_order(d); ++order )
   1.138      {
   1.139          mask = 1 << order;
   1.140          if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
   1.141 @@ -1015,7 +1043,6 @@ void shadow_free(struct domain *d, mfn_t
   1.142                  break;
   1.143              list_del(&(sp+mask)->list);
   1.144          }
   1.145 -        order++;
   1.146      }
   1.147  
   1.148      sp->order = order;
   1.149 @@ -1037,16 +1064,18 @@ sh_alloc_p2m_pages(struct domain *d)
   1.150  {
   1.151      struct page_info *pg;
   1.152      u32 i;
   1.153 +    unsigned int order = shadow_max_order(d);
   1.154 +
   1.155      ASSERT(shadow_locked_by_me(d));
   1.156      
   1.157      if ( d->arch.paging.shadow.total_pages 
   1.158 -         < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) )
   1.159 +         < (shadow_min_acceptable_pages(d) + (1 << order)) )
   1.160          return 0; /* Not enough shadow memory: need to increase it first */
   1.161      
   1.162      pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
   1.163 -    d->arch.paging.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER);
   1.164 -    d->arch.paging.shadow.total_pages -= (1<<SHADOW_MAX_ORDER);
   1.165 -    for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++)
   1.166 +    d->arch.paging.shadow.p2m_pages += (1 << order);
   1.167 +    d->arch.paging.shadow.total_pages -= (1 << order);
   1.168 +    for (i = 0; i < (1U << order); i++)
   1.169      {
   1.170          /* Unlike shadow pages, mark p2m pages as owned by the domain.
   1.171           * Marking the domain as the owner would normally allow the guest to
   1.172 @@ -1166,7 +1195,7 @@ static unsigned int sh_set_allocation(st
   1.173  {
   1.174      struct shadow_page_info *sp;
   1.175      unsigned int lower_bound;
   1.176 -    int j;
   1.177 +    unsigned int j, order = shadow_max_order(d);
   1.178  
   1.179      ASSERT(shadow_locked_by_me(d));
   1.180      
   1.181 @@ -1187,15 +1216,15 @@ static unsigned int sh_set_allocation(st
   1.182          {
   1.183              /* Need to allocate more memory from domheap */
   1.184              sp = (struct shadow_page_info *)
   1.185 -                alloc_domheap_pages(NULL, SHADOW_MAX_ORDER, 0); 
   1.186 +                alloc_domheap_pages(NULL, order, 0);
   1.187              if ( sp == NULL ) 
   1.188              { 
   1.189                  SHADOW_PRINTK("failed to allocate shadow pages.\n");
   1.190                  return -ENOMEM;
   1.191              }
   1.192 -            d->arch.paging.shadow.free_pages += 1<<SHADOW_MAX_ORDER;
   1.193 -            d->arch.paging.shadow.total_pages += 1<<SHADOW_MAX_ORDER;
   1.194 -            for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ ) 
   1.195 +            d->arch.paging.shadow.free_pages += 1 << order;
   1.196 +            d->arch.paging.shadow.total_pages += 1 << order;
   1.197 +            for ( j = 0; j < 1U << order; j++ )
   1.198              {
   1.199                  sp[j].type = 0;  
   1.200                  sp[j].pinned = 0;
   1.201 @@ -1203,21 +1232,20 @@ static unsigned int sh_set_allocation(st
   1.202                  sp[j].mbz = 0;
   1.203                  sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
   1.204              }
   1.205 -            sp->order = SHADOW_MAX_ORDER;
   1.206 -            list_add_tail(&sp->list, 
   1.207 -                          &d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]);
   1.208 +            sp->order = order;
   1.209 +            list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
   1.210          } 
   1.211          else if ( d->arch.paging.shadow.total_pages > pages ) 
   1.212          {
   1.213              /* Need to return memory to domheap */
   1.214 -            shadow_prealloc(d, SHADOW_MAX_ORDER);
   1.215 -            ASSERT(!list_empty(&d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]));
   1.216 -            sp = list_entry(d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER].next, 
   1.217 +            _shadow_prealloc(d, order, 1);
   1.218 +            ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order]));
   1.219 +            sp = list_entry(d->arch.paging.shadow.freelists[order].next,
   1.220                              struct shadow_page_info, list);
   1.221              list_del(&sp->list);
   1.222 -            d->arch.paging.shadow.free_pages -= 1<<SHADOW_MAX_ORDER;
   1.223 -            d->arch.paging.shadow.total_pages -= 1<<SHADOW_MAX_ORDER;
   1.224 -            free_domheap_pages((struct page_info *)sp, SHADOW_MAX_ORDER);
   1.225 +            d->arch.paging.shadow.free_pages -= 1 << order;
   1.226 +            d->arch.paging.shadow.total_pages -= 1 << order;
   1.227 +            free_domheap_pages((struct page_info *)sp, order);
   1.228          }
   1.229  
   1.230          /* Check to see if we need to yield and try again */
     2.1 --- a/xen/arch/x86/mm/shadow/multi.c	Sat Oct 13 08:44:06 2007 +0100
     2.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Mon Oct 15 09:28:14 2007 +0100
     2.3 @@ -1690,7 +1690,7 @@ sh_make_monitor_table(struct vcpu *v)
     2.4      ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
     2.5      
     2.6      /* Guarantee we can get the memory we need */
     2.7 -    shadow_prealloc(d, SHADOW_MAX_ORDER);
     2.8 +    shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS - 1);
     2.9  
    2.10  #if CONFIG_PAGING_LEVELS == 4    
    2.11      {
    2.12 @@ -2827,10 +2827,13 @@ static int sh_page_fault(struct vcpu *v,
    2.13      }
    2.14  
    2.15      /* Make sure there is enough free shadow memory to build a chain of
    2.16 -     * shadow tables: one SHADOW_MAX_ORDER chunk will always be enough
    2.17 -     * to allocate all we need.  (We never allocate a top-level shadow
    2.18 -     * on this path, only a 32b l1, pae l2+1 or 64b l3+2+1) */
    2.19 -    shadow_prealloc(d, SHADOW_MAX_ORDER);
    2.20 +     * shadow tables. (We never allocate a top-level shadow on this path,
    2.21 +     * only a 32b l1, pae l1, or 64b l3+2+1. Note that while
    2.22 +     * SH_type_l1_shadow isn't correct in the latter case, all page
    2.23 +     * tables are the same size there.) */
    2.24 +    shadow_prealloc(d,
    2.25 +                    SH_type_l1_shadow,
    2.26 +                    GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
    2.27  
    2.28      /* Acquire the shadow.  This must happen before we figure out the rights 
    2.29       * for the shadow entry, since we might promote a page here. */
    2.30 @@ -3444,7 +3447,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
    2.31      if ( !mfn_valid(smfn) )
    2.32      {
    2.33          /* Make sure there's enough free shadow memory. */
    2.34 -        shadow_prealloc(d, SHADOW_MAX_ORDER); 
    2.35 +        shadow_prealloc(d, root_type, 1);
    2.36          /* Shadow the page. */
    2.37          smfn = sh_make_shadow(v, gmfn, root_type);
    2.38      }
     3.1 --- a/xen/arch/x86/mm/shadow/private.h	Sat Oct 13 08:44:06 2007 +0100
     3.2 +++ b/xen/arch/x86/mm/shadow/private.h	Mon Oct 15 09:28:14 2007 +0100
     3.3 @@ -354,7 +354,7 @@ void shadow_promote(struct vcpu *v, mfn_
     3.4  void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type);
     3.5  
     3.6  /* Shadow page allocation functions */
     3.7 -void  shadow_prealloc(struct domain *d, unsigned int order);
     3.8 +void  shadow_prealloc(struct domain *d, u32 shadow_type, unsigned int count);
     3.9  mfn_t shadow_alloc(struct domain *d, 
    3.10                      u32 shadow_type,
    3.11                      unsigned long backpointer);