ia64/xen-unstable

changeset 18755:540483d2a98f

x86: simplify page reference handling for partially (in-)validated pages

Simplify general page reference management for preempted (partially
[in-]validated) pages: Reserve on reference that can be acquired
without the risk of overflowing the reference count, thus allowing to
have a simplified get_page() equivalent that cannot fail (but must be
used with care).

Doing this conversion pointed out a latent issue in the changes done
previously in this area: The extra reference must be acquired before
the 'normal' reference gets dropped, so the patch fixes this at once
in both the alloc_page_type() and free_page_type() paths (it's really
only the latter that failed to work with the change described above).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Nov 03 10:32:02 2008 +0000 (2008-11-03)
parents 303b1014f91e
children 4ec25db9326a
files xen/arch/x86/mm.c
line diff
     1.1 --- a/xen/arch/x86/mm.c	Mon Nov 03 10:24:17 2008 +0000
     1.2 +++ b/xen/arch/x86/mm.c	Mon Nov 03 10:32:02 2008 +0000
     1.3 @@ -1856,7 +1856,8 @@ int get_page(struct page_info *page, str
     1.4          nx = x + 1;
     1.5          d  = nd;
     1.6          if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
     1.7 -             unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
     1.8 +             /* Keep one spare reference to be acquired by get_page_light(). */
     1.9 +             unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
    1.10               unlikely(d != _domain) )                /* Wrong owner? */
    1.11          {
    1.12              if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
    1.13 @@ -1878,6 +1879,28 @@ int get_page(struct page_info *page, str
    1.14      return 1;
    1.15  }
    1.16  
    1.17 +/*
    1.18 + * Special version of get_page() to be used exclusively when
    1.19 + * - a page is known to already have a non-zero reference count
    1.20 + * - the page does not need its owner to be checked
    1.21 + * - it will not be called more than once without dropping the thus
    1.22 + *   acquired reference again.
    1.23 + * Due to get_page() reserving one reference, this call cannot fail.
    1.24 + */
    1.25 +static void get_page_light(struct page_info *page)
    1.26 +{
    1.27 +    u32 x, nx, y = page->count_info;
    1.28 +
    1.29 +    do {
    1.30 +        x  = y;
    1.31 +        nx = x + 1;
    1.32 +        BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */
    1.33 +        BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */
    1.34 +        y = cmpxchg(&page->count_info, x, nx);
    1.35 +    }
    1.36 +    while ( unlikely(y != x) );
    1.37 +}
    1.38 +
    1.39  
    1.40  static int alloc_page_type(struct page_info *page, unsigned long type,
    1.41                             int preemptible)
    1.42 @@ -1885,10 +1908,6 @@ static int alloc_page_type(struct page_i
    1.43      struct domain *owner = page_get_owner(page);
    1.44      int rc;
    1.45  
    1.46 -    /* Obtain an extra reference to retain if we set PGT_partial. */
    1.47 -    if ( preemptible && !get_page(page, owner) )
    1.48 -        return -EINVAL;
    1.49 -
    1.50      /* A page table is dirtied when its type count becomes non-zero. */
    1.51      if ( likely(owner != NULL) )
    1.52          paging_mark_dirty(owner, page_to_mfn(page));
    1.53 @@ -1922,14 +1941,10 @@ static int alloc_page_type(struct page_i
    1.54      wmb();
    1.55      if ( rc == -EAGAIN )
    1.56      {
    1.57 +        get_page_light(page);
    1.58          page->u.inuse.type_info |= PGT_partial;
    1.59 -        return -EAGAIN;
    1.60      }
    1.61 -
    1.62 -    if ( preemptible )
    1.63 -        put_page(page);
    1.64 -
    1.65 -    if ( rc == -EINTR )
    1.66 +    else if ( rc == -EINTR )
    1.67      {
    1.68          ASSERT((page->u.inuse.type_info &
    1.69                  (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
    1.70 @@ -2044,8 +2059,8 @@ static int __put_final_page_type(
    1.71      }
    1.72      else if ( rc == -EINTR )
    1.73      {
    1.74 -        ASSERT(!(page->u.inuse.type_info &
    1.75 -                 (PGT_count_mask|PGT_validated|PGT_partial)));
    1.76 +        ASSERT((page->u.inuse.type_info &
    1.77 +                (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
    1.78          if ( !(shadow_mode_enabled(page_get_owner(page)) &&
    1.79                 (page->count_info & PGC_page_table)) )
    1.80              page->tlbflush_timestamp = tlbflush_current_time();
    1.81 @@ -2056,14 +2071,10 @@ static int __put_final_page_type(
    1.82      {
    1.83          BUG_ON(rc != -EAGAIN);
    1.84          wmb();
    1.85 +        get_page_light(page);
    1.86          page->u.inuse.type_info |= PGT_partial;
    1.87 -        /* Must skip put_page() below. */
    1.88 -        preemptible = 0;
    1.89      }
    1.90  
    1.91 -    if ( preemptible )
    1.92 -        put_page(page);
    1.93 -
    1.94      return rc;
    1.95  }
    1.96  
    1.97 @@ -2072,10 +2083,7 @@ static int __put_page_type(struct page_i
    1.98                             int preemptible)
    1.99  {
   1.100      unsigned long nx, x, y = page->u.inuse.type_info;
   1.101 -
   1.102 -    /* Obtain an extra reference to retain if we set PGT_partial. */
   1.103 -    if ( preemptible && !get_page(page, page_get_owner(page)) )
   1.104 -        return -EINVAL;
   1.105 +    int rc = 0;
   1.106  
   1.107      for ( ; ; )
   1.108      {
   1.109 @@ -2098,10 +2106,11 @@ static int __put_page_type(struct page_i
   1.110                  if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
   1.111                                             x, nx)) != x) )
   1.112                      continue;
   1.113 +                /* We cleared the 'valid bit' so we do the clean up. */
   1.114 +                rc = __put_final_page_type(page, x, preemptible);
   1.115                  if ( x & PGT_partial )
   1.116                      put_page(page);
   1.117 -                /* We cleared the 'valid bit' so we do the clean up. */
   1.118 -                return __put_final_page_type(page, x, preemptible);
   1.119 +                break;
   1.120              }
   1.121  
   1.122              /*
   1.123 @@ -2120,17 +2129,10 @@ static int __put_page_type(struct page_i
   1.124              break;
   1.125  
   1.126          if ( preemptible && hypercall_preempt_check() )
   1.127 -        {
   1.128 -            if ( preemptible )
   1.129 -                put_page(page);
   1.130              return -EINTR;
   1.131 -        }
   1.132      }
   1.133  
   1.134 -    if ( preemptible )
   1.135 -        put_page(page);
   1.136 -
   1.137 -    return 0;
   1.138 +    return rc;
   1.139  }
   1.140  
   1.141  
   1.142 @@ -2138,6 +2140,7 @@ static int __get_page_type(struct page_i
   1.143                             int preemptible)
   1.144  {
   1.145      unsigned long nx, x, y = page->u.inuse.type_info;
   1.146 +    int rc = 0;
   1.147  
   1.148      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
   1.149  
   1.150 @@ -2233,11 +2236,7 @@ static int __get_page_type(struct page_i
   1.151          }
   1.152  
   1.153          if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
   1.154 -        {
   1.155 -            if ( (x & PGT_partial) && !(nx & PGT_partial) )
   1.156 -                put_page(page);
   1.157              break;
   1.158 -        }
   1.159  
   1.160          if ( preemptible && hypercall_preempt_check() )
   1.161              return -EINTR;
   1.162 @@ -2264,10 +2263,13 @@ static int __get_page_type(struct page_i
   1.163              page->nr_validated_ptes = 0;
   1.164              page->partial_pte = 0;
   1.165          }
   1.166 -        return alloc_page_type(page, type, preemptible);
   1.167 +        rc = alloc_page_type(page, type, preemptible);
   1.168      }
   1.169  
   1.170 -    return 0;
   1.171 +    if ( (x & PGT_partial) && !(nx & PGT_partial) )
   1.172 +        put_page(page);
   1.173 +
   1.174 +    return rc;
   1.175  }
   1.176  
   1.177  void put_page_type(struct page_info *page)