ia64/xen-unstable

changeset 2344:22afa9c3be28

bitkeeper revision 1.1159.51.2 (412cb2dfaIDYjySJYYMTByGbcM77UA)

More grant-table code, and some related sundry improvements.
author kaf24@labyrinth.cl.cam.ac.uk
date Wed Aug 25 15:40:15 2004 +0000 (2004-08-25)
parents 36ed4e3fa897
children bd470dc06d31
files xen/arch/x86/domain.c xen/arch/x86/memory.c xen/arch/x86/setup.c xen/common/domain.c xen/common/grant_table.c xen/common/kernel.c xen/common/page_alloc.c xen/include/asm-x86/atomic.h xen/include/asm-x86/mm.h xen/include/asm-x86/smp.h xen/include/asm-x86/system.h xen/include/xen/grant_table.h xen/include/xen/sched.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Wed Aug 25 15:38:29 2004 +0000
     1.2 +++ b/xen/arch/x86/domain.c	Wed Aug 25 15:40:15 2004 +0000
     1.3 @@ -668,9 +668,9 @@ int construct_dom0(struct domain *p,
     1.4            mfn++ )
     1.5      {
     1.6          page = &frame_table[mfn];
     1.7 -        page->u.inuse.domain        = p;
     1.8 +        page->u.inuse.domain     = p;
     1.9          page->u.inuse.type_info  = 0;
    1.10 -        page->u.inuse.count_info = PGC_allocated | 1;
    1.11 +        page->u.inuse.count_info = PGC_always_set | PGC_allocated | 1;
    1.12          list_add_tail(&page->list, &p->page_list);
    1.13          p->tot_pages++; p->max_pages++;
    1.14      }
     2.1 --- a/xen/arch/x86/memory.c	Wed Aug 25 15:38:29 2004 +0000
     2.2 +++ b/xen/arch/x86/memory.c	Wed Aug 25 15:40:15 2004 +0000
     2.3 @@ -153,6 +153,9 @@ void arch_init_memory(void)
     2.4      vm_assist_info[VMASST_TYPE_writable_pagetables].disable =
     2.5          ptwr_disable;
     2.6  
     2.7 +    for ( mfn = 0; mfn < max_page; mfn++ )
     2.8 +        frame_table[mfn].u.inuse.count_info |= PGC_always_set;
     2.9 +
    2.10      /* Initialise to a magic of 0x55555555 so easier to spot bugs later. */
    2.11      memset(machine_to_phys_mapping, 0x55, 4<<20);
    2.12  
    2.13 @@ -179,9 +182,9 @@ void arch_init_memory(void)
    2.14            mfn < virt_to_phys(&machine_to_phys_mapping[1<<20])>>PAGE_SHIFT;
    2.15            mfn++ )
    2.16      {
    2.17 -        frame_table[mfn].u.inuse.count_info = 1 | PGC_allocated;
    2.18 -        frame_table[mfn].u.inuse.type_info  = 1 | PGT_gdt_page; /* non-RW */
    2.19 -        frame_table[mfn].u.inuse.domain     = dom_xen;
    2.20 +        frame_table[mfn].u.inuse.count_info |= PGC_allocated | 1;
    2.21 +        frame_table[mfn].u.inuse.type_info   = PGT_gdt_page | 1; /* non-RW */
    2.22 +        frame_table[mfn].u.inuse.domain      = dom_xen;
    2.23      }
    2.24  }
    2.25  
    2.26 @@ -370,6 +373,7 @@ get_page_from_l1e(
    2.27  {
    2.28      unsigned long l1v = l1_pgentry_val(l1e);
    2.29      unsigned long pfn = l1_pgentry_to_pagenr(l1e);
    2.30 +    struct pfn_info *page = &frame_table[pfn];
    2.31      extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
    2.32  
    2.33      if ( !(l1v & _PAGE_PRESENT) )
    2.34 @@ -383,6 +387,8 @@ get_page_from_l1e(
    2.35  
    2.36      if ( unlikely(!pfn_is_ram(pfn)) )
    2.37      {
    2.38 +        /* SPECIAL CASE 1. Mapping an I/O page. */
    2.39 +
    2.40          /* Revert to caller privileges if FD == DOMID_IO. */
    2.41          if ( d == dom_io )
    2.42              d = current;
    2.43 @@ -397,17 +403,41 @@ get_page_from_l1e(
    2.44          return 0;
    2.45      }
    2.46  
    2.47 +    if ( unlikely(!get_page_from_pagenr(pfn, d)) )
    2.48 +    {
    2.49 +        /* SPECIAL CASE 2. Mapping a foreign page via a grant table. */
    2.50 +        
    2.51 +        int rc;
    2.52 +        struct domain *e;
    2.53 +        u32 count_info;
    2.54 +        /*
    2.55 +         * Yuk! Amazingly this is the simplest way to get a guaranteed atomic
    2.56 +         * snapshot of a 64-bit value on IA32. x86/64 solves this of course!
    2.57 +         * Basically it's a no-op CMPXCHG, to get us the current contents.
    2.58 +         * No need for LOCK prefix -- we know that count_info is never zero
    2.59 +         * because it contains PGC_always_set.
    2.60 +         */
    2.61 +        __asm__ __volatile__(
    2.62 +            "cmpxchg8b %2"
    2.63 +            : "=a" (e), "=d" (count_info),
    2.64 +              "=m" (*(volatile u64 *)(&page->u.inuse.domain))
    2.65 +            : "0" (0), "1" (0), "b" (0), "c" (0) );
    2.66 +        if ( unlikely((count_info & PGC_count_mask) == 0) ||
    2.67 +             unlikely(e == NULL) || unlikely(!get_domain(e)) )
    2.68 +             return 0;
    2.69 +        rc = gnttab_try_map(e, d, page, l1v & _PAGE_RW);
    2.70 +        put_domain(e);
    2.71 +        return rc;
    2.72 +    }
    2.73 +
    2.74      if ( l1v & _PAGE_RW )
    2.75      {
    2.76 -        if ( unlikely(!get_page_and_type_from_pagenr(
    2.77 -            pfn, PGT_writable_page, d)) )
    2.78 +        if ( unlikely(!get_page_type(page, PGT_writable_page)) )
    2.79              return 0;
    2.80 -        set_bit(_PGC_tlb_flush_on_type_change, 
    2.81 -                &frame_table[pfn].u.inuse.count_info);
    2.82 -        return 1;
    2.83 +        set_bit(_PGC_tlb_flush_on_type_change, &page->u.inuse.count_info);
    2.84      }
    2.85  
    2.86 -    return get_page_from_pagenr(pfn, d);
    2.87 +    return 1;
    2.88  }
    2.89  
    2.90  
    2.91 @@ -434,14 +464,33 @@ get_page_from_l2e(
    2.92  }
    2.93  
    2.94  
    2.95 -static void put_page_from_l1e(l1_pgentry_t l1e)
    2.96 +static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
    2.97  {
    2.98      struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
    2.99      unsigned long    l1v  = l1_pgentry_val(l1e);
   2.100 +    struct domain   *e = page->u.inuse.domain;
   2.101  
   2.102      if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(l1v >> PAGE_SHIFT) )
   2.103          return;
   2.104  
   2.105 +    if ( unlikely(e != d) )
   2.106 +    {
   2.107 +        /*
   2.108 +         * Unmap a foreign page that may have been mapped via a grant table.
   2.109 +         * Note that this can fail for a privileged domain that can map foreign
   2.110 +         * pages via MMUEXT_SET_FOREIGNDOM. Such domains can have some mappings
   2.111 +         * counted via a grant entry and some counted directly in the page
   2.112 +         * structure's reference count. Note that reference counts won't get
   2.113 +         * dangerously confused as long as we always try to decrement the
   2.114 +         * grant entry first. We may end up with a mismatch between which
   2.115 +         * mappings and which unmappings are counted via the grant entry, but
   2.116 +         * really it doesn't matter as privileged domains have carte blanche.
   2.117 +         */
   2.118 +        if ( likely(gnttab_try_unmap(e, d, page, l1v & _PAGE_RW)) )
   2.119 +            return;
   2.120 +        /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */
   2.121 +    }
   2.122 +
   2.123      if ( l1v & _PAGE_RW )
   2.124      {
   2.125          put_page_and_type(page);
   2.126 @@ -452,7 +501,7 @@ static void put_page_from_l1e(l1_pgentry
   2.127          if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) == 
   2.128                         PGT_ldt_page)) &&
   2.129               unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) )
   2.130 -            invalidate_shadow_ldt(page->u.inuse.domain);
   2.131 +            invalidate_shadow_ldt(e);
   2.132          put_page(page);
   2.133      }
   2.134  }
   2.135 @@ -527,7 +576,7 @@ static int alloc_l1_table(struct pfn_inf
   2.136  
   2.137   fail:
   2.138      while ( i-- > 0 )
   2.139 -        put_page_from_l1e(pl1e[i]);
   2.140 +        put_page_from_l1e(pl1e[i], d);
   2.141  
   2.142      unmap_domain_mem(pl1e);
   2.143      return 0;
   2.144 @@ -551,6 +600,7 @@ static void free_l2_table(struct pfn_inf
   2.145  
   2.146  static void free_l1_table(struct pfn_info *page)
   2.147  {
   2.148 +    struct domain *d = page->u.inuse.domain;
   2.149      unsigned long page_nr = page - frame_table;
   2.150      l1_pgentry_t *pl1e;
   2.151      int i;
   2.152 @@ -558,7 +608,7 @@ static void free_l1_table(struct pfn_inf
   2.153      pl1e = map_domain_mem(page_nr << PAGE_SHIFT);
   2.154  
   2.155      for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   2.156 -        put_page_from_l1e(pl1e[i]);
   2.157 +        put_page_from_l1e(pl1e[i], d);
   2.158  
   2.159      unmap_domain_mem(pl1e);
   2.160  }
   2.161 @@ -651,6 +701,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
   2.162  {
   2.163      l1_pgentry_t ol1e;
   2.164      unsigned long _ol1e;
   2.165 +    struct domain *d = current;
   2.166  
   2.167      if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
   2.168      {
   2.169 @@ -671,18 +722,18 @@ static int mod_l1_entry(l1_pgentry_t *pl
   2.170          
   2.171          if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   2.172          {
   2.173 -            put_page_from_l1e(nl1e);
   2.174 +            put_page_from_l1e(nl1e, d);
   2.175              return 0;
   2.176          }
   2.177          
   2.178 -        put_page_from_l1e(ol1e);
   2.179 +        put_page_from_l1e(ol1e, d);
   2.180          return 1;
   2.181      }
   2.182  
   2.183      if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   2.184          return 0;
   2.185      
   2.186 -    put_page_from_l1e(ol1e);
   2.187 +    put_page_from_l1e(ol1e, d);
   2.188      return 1;
   2.189  }
   2.190  
   2.191 @@ -1289,20 +1340,10 @@ int do_update_va_mapping_otherdomain(uns
   2.192  }
   2.193  
   2.194  
   2.195 -static inline int readonly_page_from_l1e(l1_pgentry_t l1e)
   2.196 -{
   2.197 -    struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
   2.198 -    unsigned long    l1v  = l1_pgentry_val(l1e);
   2.199  
   2.200 -    if ( (l1v & _PAGE_RW) || !(l1v & _PAGE_PRESENT) ||
   2.201 -         !pfn_is_ram(l1v >> PAGE_SHIFT) )
   2.202 -        return 0;
   2.203 -    put_page_type(page);
   2.204 -    return 1;
   2.205 -}
   2.206 -
   2.207 -
   2.208 -/* Writable Pagetables */
   2.209 +/*************************
   2.210 + * Writable Pagetables
   2.211 + */
   2.212  
   2.213  ptwr_info_t ptwr_info[NR_CPUS] =
   2.214      { [ 0 ... NR_CPUS-1 ] =
   2.215 @@ -1365,13 +1406,8 @@ void ptwr_reconnect_disconnected(unsigne
   2.216          nl1e = pl1e[i];
   2.217          if (likely(l1_pgentry_val(nl1e) == l1_pgentry_val(ol1e)))
   2.218              continue;
   2.219 -        if (likely((l1_pgentry_val(nl1e) ^ l1_pgentry_val(ol1e)) ==
   2.220 -                   _PAGE_RW)) {
   2.221 -            if (likely(readonly_page_from_l1e(nl1e)))
   2.222 -                continue;
   2.223 -        }
   2.224          if (unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT))
   2.225 -            put_page_from_l1e(ol1e);
   2.226 +            put_page_from_l1e(ol1e, current);
   2.227          if (unlikely(!get_page_from_l1e(nl1e, current)))
   2.228              BUG();
   2.229      }
   2.230 @@ -1438,7 +1474,7 @@ void ptwr_flush_inactive(void)
   2.231              if (likely(l1_pgentry_val(ol1e) == l1_pgentry_val(nl1e)))
   2.232                  continue;
   2.233              if (unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT))
   2.234 -                put_page_from_l1e(ol1e);
   2.235 +                put_page_from_l1e(ol1e, current);
   2.236              if (unlikely(!get_page_from_l1e(nl1e, current)))
   2.237                  BUG();
   2.238          }
     3.1 --- a/xen/arch/x86/setup.c	Wed Aug 25 15:38:29 2004 +0000
     3.2 +++ b/xen/arch/x86/setup.c	Wed Aug 25 15:40:15 2004 +0000
     3.3 @@ -411,7 +411,7 @@ void __init start_of_day(void)
     3.4      clear_bit(smp_processor_id(), &wait_init_idle);
     3.5      smp_threads_ready = 1;
     3.6      smp_commence(); /* Tell other CPUs that state of the world is stable. */
     3.7 -    while (wait_init_idle) 
     3.8 +    while ( wait_init_idle != 0 )
     3.9      {
    3.10          cpu_relax();
    3.11          barrier();
     4.1 --- a/xen/common/domain.c	Wed Aug 25 15:38:29 2004 +0000
     4.2 +++ b/xen/common/domain.c	Wed Aug 25 15:40:15 2004 +0000
     4.3 @@ -232,12 +232,16 @@ void domain_destruct(struct domain *d)
     4.4  {
     4.5      struct domain **pd;
     4.6      unsigned long flags;
     4.7 +    atomic_t      old, new;
     4.8  
     4.9      if ( !test_bit(DF_DYING, &d->flags) )
    4.10          BUG();
    4.11  
    4.12      /* May be already destructed, or get_domain() can race us. */
    4.13 -    if ( cmpxchg(&d->refcnt.counter, 0, DOMAIN_DESTRUCTED) != 0 )
    4.14 +    _atomic_set(old, 0);
    4.15 +    _atomic_set(new, DOMAIN_DESTRUCTED);
    4.16 +    old = atomic_compareandswap(old, new, &d->refcnt);
    4.17 +    if ( _atomic_read(old) != 0 )
    4.18          return;
    4.19  
    4.20      DPRINTK("Releasing task %u\n", d->domain);
     5.1 --- a/xen/common/grant_table.c	Wed Aug 25 15:38:29 2004 +0000
     5.2 +++ b/xen/common/grant_table.c	Wed Aug 25 15:40:15 2004 +0000
     5.3 @@ -24,6 +24,13 @@
     5.4  #include <xen/config.h>
     5.5  #include <xen/sched.h>
     5.6  
     5.7 +#define PIN_FAIL(_rc, _f, _a...)   \
     5.8 +    do {                           \
     5.9 +        DPRINTK( _f, ## _a );      \
    5.10 +        rc = -(_rc);               \
    5.11 +        goto out;                  \
    5.12 +    } while ( 0 )
    5.13 +
    5.14  static inline void
    5.15  check_tlb_flush(
    5.16      active_grant_entry_t *a)
    5.17 @@ -70,6 +77,7 @@ gnttab_update_pin_status(
    5.18      active_grant_entry_t *act;
    5.19      grant_entry_t *sha;
    5.20      long           rc = 0;
    5.21 +    unsigned long  frame;
    5.22  
    5.23      ld = current;
    5.24  
    5.25 @@ -93,8 +101,11 @@ gnttab_update_pin_status(
    5.26          return -EINVAL;
    5.27      }
    5.28  
    5.29 -    if ( unlikely((rd = find_domain_by_id(dom)) == NULL) )
    5.30 +    if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
    5.31 +         unlikely(ld == rd) )
    5.32      {
    5.33 +        if ( rd != NULL )
    5.34 +            put_domain(rd);
    5.35          DPRINTK("Could not find domain %d\n", dom);
    5.36          return -ESRCH;
    5.37      }
    5.38 @@ -102,6 +113,8 @@ gnttab_update_pin_status(
    5.39      act = &rd->grant_table->active[ref];
    5.40      sha = &rd->grant_table->shared[ref];
    5.41  
    5.42 +    spin_lock(&rd->grant_table->lock);
    5.43 +
    5.44      if ( act->status == 0 )
    5.45      {
    5.46          if ( unlikely(pin_flags == 0) )
    5.47 @@ -118,23 +131,17 @@ gnttab_update_pin_status(
    5.48  
    5.49              if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
    5.50                   unlikely(sdom != ld->domain) )
    5.51 -            {
    5.52 -                DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
    5.53 +                PIN_FAIL(EINVAL,
    5.54 +                         "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
    5.55                          sflags, sdom, ld->domain);
    5.56 -                rc = -EINVAL;
    5.57 -                goto out;
    5.58 -            }
    5.59  
    5.60              sflags |= GTF_reading;
    5.61              if ( !(pin_flags & GNTPIN_readonly) )
    5.62              {
    5.63                  sflags |= GTF_writing;
    5.64                  if ( unlikely(sflags & GTF_readonly) )
    5.65 -                {
    5.66 -                    DPRINTK("Attempt to write-pin a read-only grant entry.\n");
    5.67 -                    rc = -EINVAL;
    5.68 -                    goto out;
    5.69 -                }
    5.70 +                    PIN_FAIL(EINVAL,
    5.71 +                             "Attempt to write-pin a r/o grant entry.\n");
    5.72              }
    5.73  
    5.74              /* Merge two 16-bit values into a 32-bit combined update. */
    5.75 @@ -144,11 +151,8 @@ gnttab_update_pin_status(
    5.76              /* NB. prev_sflags is updated in place to seen value. */
    5.77              if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, 
    5.78                                         prev_scombo | GTF_writing)) )
    5.79 -            {
    5.80 -                DPRINTK("Fault while modifying shared flags and domid.\n");
    5.81 -                rc = -EINVAL;
    5.82 -                goto out;
    5.83 -            }
    5.84 +                PIN_FAIL(EINVAL,
    5.85 +                         "Fault while modifying shared flags and domid.\n");
    5.86  
    5.87              /* Did the combined update work (did we see what we expected?). */
    5.88              if ( prev_scombo == scombo )
    5.89 @@ -161,10 +165,22 @@ gnttab_update_pin_status(
    5.90          }
    5.91  
    5.92          /* rmb(); */ /* not on x86 */
    5.93 +        frame = sha->frame;
    5.94 +        if ( unlikely(!pfn_is_ram(frame)) || 
    5.95 +             unlikely(!((pin_flags & GNTPIN_readonly) ? 
    5.96 +                        get_page(&frame_table[frame], rd) : 
    5.97 +                        get_page_and_type(&frame_table[frame], rd, 
    5.98 +                                          PGT_writable_page))) )
    5.99 +        {
   5.100 +            clear_bit(_GTF_writing, &sha->flags);
   5.101 +            clear_bit(_GTF_reading, &sha->flags);
   5.102 +            PIN_FAIL(EINVAL, 
   5.103 +                     "Could not pin the granted frame!\n");
   5.104 +        }
   5.105  
   5.106          act->status = pin_flags;
   5.107          act->domid  = sdom;
   5.108 -        act->frame  = sha->frame;
   5.109 +        act->frame  = frame;
   5.110  
   5.111          make_entry_mappable(rd->grant_table, act);
   5.112      }
   5.113 @@ -174,11 +190,13 @@ gnttab_update_pin_status(
   5.114  
   5.115          if ( unlikely((act->status & 
   5.116                         (GNTPIN_wmap_mask|GNTPIN_rmap_mask)) != 0) )
   5.117 -        {
   5.118 -            DPRINTK("Attempt to deactivate a mapped g.e. (%x)\n", act->status);
   5.119 -            rc = -EINVAL;
   5.120 -            goto out;
   5.121 -        }
   5.122 +            PIN_FAIL(EINVAL,
   5.123 +                     "Attempt to deactiv a mapped g.e. (%x)\n", act->status);
   5.124 +
   5.125 +        frame = act->frame;
   5.126 +        if ( !(act->status & GNTPIN_readonly) )
   5.127 +            put_page_type(&frame_table[frame]);
   5.128 +        put_page(&frame_table[frame]);
   5.129  
   5.130          act->status = 0;
   5.131          make_entry_unmappable(rd->grant_table, act);
   5.132 @@ -199,12 +217,9 @@ gnttab_update_pin_status(
   5.133               (unlikely((act->status & GNTPIN_wmap_mask) != 0) ||
   5.134                (((pin_flags & GNTPIN_host_accessible) == 0) &&
   5.135                 unlikely((act->status & GNTPIN_rmap_mask) != 0))) )
   5.136 -        {
   5.137 -            DPRINTK("Attempt to reduce pinning of a mapped g.e. (%x,%x)\n",
   5.138 +            PIN_FAIL(EINVAL,
   5.139 +                     "Attempt to reduce pinning of a mapped g.e. (%x,%x)\n",
   5.140                      pin_flags, act->status);
   5.141 -            rc = -EINVAL;
   5.142 -            goto out;
   5.143 -        }
   5.144  
   5.145          /* Check for changes to host accessibility. */
   5.146          if ( pin_flags & GNTPIN_host_accessible )
   5.147 @@ -220,6 +235,7 @@ gnttab_update_pin_status(
   5.148          {
   5.149              if ( !(act->status & GNTPIN_readonly) )
   5.150              {
   5.151 +                put_page_type(&frame_table[act->frame]);
   5.152                  check_tlb_flush(act);
   5.153                  clear_bit(_GTF_writing, &sha->flags);
   5.154              }
   5.155 @@ -231,20 +247,19 @@ gnttab_update_pin_status(
   5.156                  prev_sflags = sflags;
   5.157  
   5.158                  if ( unlikely(prev_sflags & GTF_readonly) )
   5.159 -                {
   5.160 -                    DPRINTK("Attempt to write-pin a read-only grant entry.\n");
   5.161 -                    rc = -EINVAL;
   5.162 -                    goto out;
   5.163 -                }
   5.164 -                
   5.165 +                    PIN_FAIL(EINVAL,
   5.166 +                             "Attempt to write-pin a r/o grant entry.\n");
   5.167 +
   5.168 +                if ( unlikely(!get_page_type(&frame_table[act->frame],
   5.169 +                                             PGT_writable_page)) )
   5.170 +                    PIN_FAIL(EINVAL,
   5.171 +                             "Attempt to write-pin a unwritable page.\n");
   5.172 +
   5.173                  /* NB. prev_sflags is updated in place to seen value. */
   5.174                  if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, 
   5.175                                             prev_sflags | GTF_writing)) )
   5.176 -                {
   5.177 -                    DPRINTK("Fault while modifying shared flags.\n");
   5.178 -                    rc = -EINVAL;
   5.179 -                    goto out;
   5.180 -                }
   5.181 +                    PIN_FAIL(EINVAL,
   5.182 +                             "Fault while modifying shared flags.\n");
   5.183              }
   5.184              while ( prev_sflags != sflags );
   5.185          }
   5.186 @@ -261,6 +276,7 @@ gnttab_update_pin_status(
   5.187      (void)__put_user(act->frame, &uop->host_phys_addr);
   5.188  
   5.189   out:
   5.190 +    spin_unlock(&rd->grant_table->lock);
   5.191      put_domain(rd);
   5.192      return rc;
   5.193  }
   5.194 @@ -289,6 +305,20 @@ do_grant_table_op(
   5.195      return rc;
   5.196  }
   5.197  
   5.198 +int
   5.199 +gnttab_try_map(
   5.200 +    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
   5.201 +{
   5.202 +    return 0;
   5.203 +}
   5.204 +
   5.205 +int
   5.206 +gnttab_try_unmap(
   5.207 +    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly)
   5.208 +{
   5.209 +    return 0;
   5.210 +}
   5.211 +
   5.212  int 
   5.213  grant_table_create(
   5.214      struct domain *d)
   5.215 @@ -318,6 +348,7 @@ grant_table_create(
   5.216      SHARE_PFN_WITH_DOMAIN(virt_to_page(t->shared), d);
   5.217  
   5.218      /* Okay, install the structure. */
   5.219 +    wmb(); /* avoid races with lock-free access to d->grant_table */
   5.220      d->grant_table = t;
   5.221      return 0;
   5.222  
     6.1 --- a/xen/common/kernel.c	Wed Aug 25 15:38:29 2004 +0000
     6.2 +++ b/xen/common/kernel.c	Wed Aug 25 15:40:15 2004 +0000
     6.3 @@ -296,9 +296,19 @@ void cmain(multiboot_info_t *mbi)
     6.4      xmem_cache_init();
     6.5      xmem_cache_sizes_init(max_page);
     6.6  
     6.7 +    /*
     6.8 +     * Create a domain-structure allocator. The SLAB_NO_REAP flag is essential!
     6.9 +     * This is because in some situations a domain's reference count will be
    6.10 +     * incremented by someone with no other handle on the structure -- this is 
    6.11 +     * inherently racey because the struct could be freed by the time that the
    6.12 +     * count is incremented. By specifying 'no-reap' we ensure that, worst
    6.13 +     * case, they increment some other domain's count, rather than corrupting
    6.14 +     * a random field in a random structure!
    6.15 +     * See, for example, arch/x86/memory.c:get_page_from_l1e().
    6.16 +     */
    6.17      domain_struct_cachep = xmem_cache_create(
    6.18          "domain_cache", sizeof(struct domain),
    6.19 -        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
    6.20 +        0, SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL);
    6.21      if ( domain_struct_cachep == NULL )
    6.22          panic("No slab cache for task structs.");
    6.23  
     7.1 --- a/xen/common/page_alloc.c	Wed Aug 25 15:38:29 2004 +0000
     7.2 +++ b/xen/common/page_alloc.c	Wed Aug 25 15:40:15 2004 +0000
     7.3 @@ -300,12 +300,21 @@ void init_xenheap_pages(unsigned long ps
     7.4  unsigned long alloc_xenheap_pages(int order)
     7.5  {
     7.6      struct pfn_info *pg;
     7.7 -    int attempts = 0;
     7.8 +    int i, attempts = 0;
     7.9  
    7.10   retry:
    7.11      if ( unlikely((pg = alloc_heap_pages(MEMZONE_XEN, order)) == NULL) )
    7.12          goto no_memory;
    7.13 +
    7.14      memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
    7.15 +
    7.16 +    for ( i = 0; i < (1 << order); i++ )
    7.17 +    {
    7.18 +        pg[i].u.inuse.count_info = PGC_always_set;
    7.19 +        pg[i].u.inuse.domain     = NULL;
    7.20 +        pg[i].u.inuse.type_info  = 0;
    7.21 +    }
    7.22 +
    7.23      return (unsigned long)page_to_virt(pg);
    7.24  
    7.25   no_memory:
    7.26 @@ -343,7 +352,7 @@ struct pfn_info *alloc_domheap_pages(str
    7.27  {
    7.28      struct pfn_info *pg;
    7.29      unsigned long mask, flushed_mask, pfn_stamp, cpu_stamp;
    7.30 -    int i;
    7.31 +    int i, j;
    7.32  
    7.33      ASSERT(!in_irq());
    7.34  
    7.35 @@ -353,19 +362,16 @@ struct pfn_info *alloc_domheap_pages(str
    7.36      flushed_mask = 0;
    7.37      for ( i = 0; i < (1 << order); i++ )
    7.38      {
    7.39 -        pg[i].u.inuse.domain    = NULL;
    7.40 -        pg[i].u.inuse.type_info = 0;
    7.41 -
    7.42          if ( (mask = (pg[i].u.free.cpu_mask & ~flushed_mask)) != 0 )
    7.43          {
    7.44              pfn_stamp = pg[i].tlbflush_timestamp;
    7.45 -            for ( i = 0; (mask != 0) && (i < smp_num_cpus); i++ )
    7.46 +            for ( j = 0; (mask != 0) && (j < smp_num_cpus); j++ )
    7.47              {
    7.48 -                if ( mask & (1<<i) )
    7.49 +                if ( mask & (1<<j) )
    7.50                  {
    7.51 -                    cpu_stamp = tlbflush_time[i];
    7.52 +                    cpu_stamp = tlbflush_time[j];
    7.53                      if ( !NEED_FLUSH(cpu_stamp, pfn_stamp) )
    7.54 -                        mask &= ~(1<<i);
    7.55 +                        mask &= ~(1<<j);
    7.56                  }
    7.57              }
    7.58              
    7.59 @@ -376,6 +382,10 @@ struct pfn_info *alloc_domheap_pages(str
    7.60                  flushed_mask |= mask;
    7.61              }
    7.62          }
    7.63 +
    7.64 +        pg[i].u.inuse.count_info = PGC_always_set;
    7.65 +        pg[i].u.inuse.domain     = NULL;
    7.66 +        pg[i].u.inuse.type_info  = 0;
    7.67      }
    7.68  
    7.69      if ( d == NULL )
    7.70 @@ -401,7 +411,7 @@ struct pfn_info *alloc_domheap_pages(str
    7.71      {
    7.72          pg[i].u.inuse.domain = d;
    7.73          wmb(); /* Domain pointer must be visible before updating refcnt. */
    7.74 -        pg[i].u.inuse.count_info = PGC_allocated | 1;
    7.75 +        pg[i].u.inuse.count_info |= PGC_allocated | 1;
    7.76          list_add_tail(&pg[i].list, &d->page_list);
    7.77      }
    7.78  
    7.79 @@ -418,10 +428,13 @@ void free_domheap_pages(struct pfn_info 
    7.80      if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
    7.81      {
    7.82          spin_lock_recursive(&d->page_alloc_lock);
    7.83 +
    7.84          for ( i = 0; i < (1 << order); i++ )
    7.85              list_del(&pg[i].list);
    7.86 +
    7.87          d->xenheap_pages -= 1 << order;
    7.88          drop_dom_ref = (d->xenheap_pages == 0);
    7.89 +
    7.90          spin_unlock_recursive(&d->page_alloc_lock);
    7.91      }
    7.92      else if ( likely(d != NULL) )
    7.93 @@ -431,9 +444,8 @@ void free_domheap_pages(struct pfn_info 
    7.94  
    7.95          for ( i = 0; i < (1 << order); i++ )
    7.96          {
    7.97 -            pg[i].tlbflush_timestamp = tlbflush_clock;
    7.98 -            pg[i].u.inuse.count_info = 0;
    7.99 -            pg[i].u.free.cpu_mask    = 1 << d->processor;
   7.100 +            pg[i].tlbflush_timestamp  = tlbflush_clock;
   7.101 +            pg[i].u.free.cpu_mask     = 1 << d->processor;
   7.102              list_del(&pg[i].list);
   7.103          }
   7.104  
     8.1 --- a/xen/include/asm-x86/atomic.h	Wed Aug 25 15:38:29 2004 +0000
     8.2 +++ b/xen/include/asm-x86/atomic.h	Wed Aug 25 15:40:15 2004 +0000
     8.3 @@ -2,11 +2,7 @@
     8.4  #define __ARCH_X86_ATOMIC__
     8.5  
     8.6  #include <xen/config.h>
     8.7 -
     8.8 -/*
     8.9 - * Atomic operations that C can't guarantee us.  Useful for
    8.10 - * resource counting etc..
    8.11 - */
    8.12 +#include <asm/system.h>
    8.13  
    8.14  #ifdef CONFIG_SMP
    8.15  #define LOCK "lock ; "
    8.16 @@ -15,11 +11,11 @@
    8.17  #endif
    8.18  
    8.19  /*
    8.20 - * Make sure gcc doesn't try to be clever and move things around
    8.21 - * on us. We need to use _exactly_ the address the user gave us,
    8.22 - * not some alias that contains the same information.
    8.23 + * NB. I've pushed the volatile qualifier into the operations. This allows
    8.24 + * fast accessors such as _atomic_read() and _atomic_set() which don't give
    8.25 + * the compiler a fit.
    8.26   */
    8.27 -typedef struct { volatile int counter; } atomic_t;
    8.28 +typedef struct { int counter; } atomic_t;
    8.29  
    8.30  #define ATOMIC_INIT(i)	{ (i) }
    8.31  
    8.32 @@ -29,8 +25,9 @@ typedef struct { volatile int counter; }
    8.33   * 
    8.34   * Atomically reads the value of @v.  Note that the guaranteed
    8.35   * useful range of an atomic_t is only 24 bits.
    8.36 - */ 
    8.37 -#define atomic_read(v)		((v)->counter)
    8.38 + */
    8.39 +#define _atomic_read(v)		((v).counter)
    8.40 +#define atomic_read(v)		(*(volatile int *)&((v)->counter))
    8.41  
    8.42  /**
    8.43   * atomic_set - set atomic variable
    8.44 @@ -40,7 +37,8 @@ typedef struct { volatile int counter; }
    8.45   * Atomically sets the value of @v to @i.  Note that the guaranteed
    8.46   * useful range of an atomic_t is only 24 bits.
    8.47   */ 
    8.48 -#define atomic_set(v,i)		(((v)->counter) = (i))
    8.49 +#define _atomic_set(v,i)	(((v).counter) = (i))
    8.50 +#define atomic_set(v,i)		(*(volatile int *)&((v)->counter) = (i))
    8.51  
    8.52  /**
    8.53   * atomic_add - add integer to atomic variable
    8.54 @@ -54,8 +52,8 @@ static __inline__ void atomic_add(int i,
    8.55  {
    8.56  	__asm__ __volatile__(
    8.57  		LOCK "addl %1,%0"
    8.58 -		:"=m" (v->counter)
    8.59 -		:"ir" (i), "m" (v->counter));
    8.60 +		:"=m" (*(volatile int *)&v->counter)
    8.61 +		:"ir" (i), "m" (*(volatile int *)&v->counter));
    8.62  }
    8.63  
    8.64  /**
    8.65 @@ -70,8 +68,8 @@ static __inline__ void atomic_sub(int i,
    8.66  {
    8.67  	__asm__ __volatile__(
    8.68  		LOCK "subl %1,%0"
    8.69 -		:"=m" (v->counter)
    8.70 -		:"ir" (i), "m" (v->counter));
    8.71 +		:"=m" (*(volatile int *)&v->counter)
    8.72 +		:"ir" (i), "m" (*(volatile int *)&v->counter));
    8.73  }
    8.74  
    8.75  /**
    8.76 @@ -90,8 +88,8 @@ static __inline__ int atomic_sub_and_tes
    8.77  
    8.78  	__asm__ __volatile__(
    8.79  		LOCK "subl %2,%0; sete %1"
    8.80 -		:"=m" (v->counter), "=qm" (c)
    8.81 -		:"ir" (i), "m" (v->counter) : "memory");
    8.82 +		:"=m" (*(volatile int *)&v->counter), "=qm" (c)
    8.83 +		:"ir" (i), "m" (*(volatile int *)&v->counter) : "memory");
    8.84  	return c;
    8.85  }
    8.86  
    8.87 @@ -106,8 +104,8 @@ static __inline__ void atomic_inc(atomic
    8.88  {
    8.89  	__asm__ __volatile__(
    8.90  		LOCK "incl %0"
    8.91 -		:"=m" (v->counter)
    8.92 -		:"m" (v->counter));
    8.93 +		:"=m" (*(volatile int *)&v->counter)
    8.94 +		:"m" (*(volatile int *)&v->counter));
    8.95  }
    8.96  
    8.97  /**
    8.98 @@ -121,8 +119,8 @@ static __inline__ void atomic_dec(atomic
    8.99  {
   8.100  	__asm__ __volatile__(
   8.101  		LOCK "decl %0"
   8.102 -		:"=m" (v->counter)
   8.103 -		:"m" (v->counter));
   8.104 +		:"=m" (*(volatile int *)&v->counter)
   8.105 +		:"m" (*(volatile int *)&v->counter));
   8.106  }
   8.107  
   8.108  /**
   8.109 @@ -140,8 +138,8 @@ static __inline__ int atomic_dec_and_tes
   8.110  
   8.111  	__asm__ __volatile__(
   8.112  		LOCK "decl %0; sete %1"
   8.113 -		:"=m" (v->counter), "=qm" (c)
   8.114 -		:"m" (v->counter) : "memory");
   8.115 +		:"=m" (*(volatile int *)&v->counter), "=qm" (c)
   8.116 +		:"m" (*(volatile int *)&v->counter) : "memory");
   8.117  	return c != 0;
   8.118  }
   8.119  
   8.120 @@ -160,8 +158,8 @@ static __inline__ int atomic_inc_and_tes
   8.121  
   8.122  	__asm__ __volatile__(
   8.123  		LOCK "incl %0; sete %1"
   8.124 -		:"=m" (v->counter), "=qm" (c)
   8.125 -		:"m" (v->counter) : "memory");
   8.126 +		:"=m" (*(volatile int *)&v->counter), "=qm" (c)
   8.127 +		:"m" (*(volatile int *)&v->counter) : "memory");
   8.128  	return c != 0;
   8.129  }
   8.130  
   8.131 @@ -181,11 +179,20 @@ static __inline__ int atomic_add_negativ
   8.132  
   8.133  	__asm__ __volatile__(
   8.134  		LOCK "addl %2,%0; sets %1"
   8.135 -		:"=m" (v->counter), "=qm" (c)
   8.136 -		:"ir" (i), "m" (v->counter) : "memory");
   8.137 +		:"=m" (*(volatile int *)&v->counter), "=qm" (c)
   8.138 +		:"ir" (i), "m" (*(volatile int *)&v->counter) : "memory");
   8.139  	return c;
   8.140  }
   8.141  
   8.142 +static __inline__ atomic_t atomic_compareandswap(
   8.143 +	atomic_t old, atomic_t new, atomic_t *v)
   8.144 +{
   8.145 +	atomic_t rc;
   8.146 +	rc.counter = 
   8.147 +		__cmpxchg(&v->counter, old.counter, new.counter, sizeof(int));
   8.148 +	return rc;
   8.149 +}
   8.150 +
   8.151  /* Atomic operations are already serializing on x86 */
   8.152  #define smp_mb__before_atomic_dec()	barrier()
   8.153  #define smp_mb__after_atomic_dec()	barrier()
     9.1 --- a/xen/include/asm-x86/mm.h	Wed Aug 25 15:38:29 2004 +0000
     9.2 +++ b/xen/include/asm-x86/mm.h	Wed Aug 25 15:40:15 2004 +0000
     9.3 @@ -87,9 +87,11 @@ struct pfn_info
     9.4   /* Cleared when the owning guest 'frees' this page. */
     9.5  #define _PGC_allocated                29
     9.6  #define PGC_allocated                 (1<<_PGC_allocated)
     9.7 - /* 28-bit count of references to this frame. */
     9.8 -#define PGC_count_mask                ((1<<29)-1)
     9.9 -
    9.10 + /* This bit is always set, guaranteeing that the count word is never zero. */
    9.11 +#define _PGC_always_set               28
    9.12 +#define PGC_always_set                (1<<_PGC_always_set)
    9.13 + /* 27-bit count of references to this frame. */
    9.14 +#define PGC_count_mask                ((1<<28)-1)
    9.15  
    9.16  /* We trust the slab allocator in slab.c, and our use of it. */
    9.17  #define PageSlab(page)		(1)
    9.18 @@ -106,7 +108,8 @@ struct pfn_info
    9.19          wmb(); /* install valid domain ptr before updating refcnt. */       \
    9.20          spin_lock(&(_dom)->page_alloc_lock);                                \
    9.21          /* _dom holds an allocation reference */                            \
    9.22 -        (_pfn)->u.inuse.count_info = PGC_allocated | 1;                     \
    9.23 +        ASSERT((_pfn)->u.inuse.count_info == PGC_always_set);               \
    9.24 +        (_pfn)->u.inuse.count_info |= PGC_allocated | 1;                    \
    9.25          if ( unlikely((_dom)->xenheap_pages++ == 0) )                       \
    9.26              get_knownalive_domain(_dom);                                    \
    9.27          list_add_tail(&(_pfn)->list, &(_dom)->xenpage_list);                \
    9.28 @@ -150,10 +153,8 @@ static inline int get_page(struct pfn_in
    9.29               unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
    9.30               unlikely(p != domain) )                 /* Wrong owner? */
    9.31          {
    9.32 -            DPRINTK("Error pfn %08lx: ed=%p(%u), sd=%p(%u),"
    9.33 -                    " caf=%08x, taf=%08x\n",
    9.34 -                    page_to_pfn(page), domain, domain->domain,
    9.35 -                    p, (p && !((x & PGC_count_mask) == 0))?p->domain:999, 
    9.36 +            DPRINTK("Error pfn %08lx: ed=%p, sd=%p, caf=%08x, taf=%08x\n",
    9.37 +                    page_to_pfn(page), domain, p,
    9.38                      x, page->u.inuse.type_info);
    9.39              return 0;
    9.40          }
    9.41 @@ -364,26 +365,21 @@ void ptwr_reconnect_disconnected(unsigne
    9.42  void ptwr_flush_inactive(void);
    9.43  int ptwr_do_page_fault(unsigned long);
    9.44  
    9.45 -static always_inline void 
    9.46 -__cleanup_writable_pagetable(
    9.47 -    const int what)
    9.48 -{
    9.49 -    int cpu = smp_processor_id();
    9.50 +#define __cleanup_writable_pagetable(_what)                               \
    9.51 +do {                                                                      \
    9.52 +    int cpu = smp_processor_id();                                         \
    9.53 +    if ((_what) & PTWR_CLEANUP_ACTIVE)                                    \
    9.54 +        if (ptwr_info[cpu].disconnected != ENTRIES_PER_L2_PAGETABLE)      \
    9.55 +            ptwr_reconnect_disconnected(0L);                              \
    9.56 +    if ((_what) & PTWR_CLEANUP_INACTIVE)                                  \
    9.57 +        if (ptwr_info[cpu].writable_idx)                                  \
    9.58 +            ptwr_flush_inactive();                                        \
    9.59 +} while ( 0 )
    9.60  
    9.61 -    if (what & PTWR_CLEANUP_ACTIVE)
    9.62 -        if (ptwr_info[cpu].disconnected != ENTRIES_PER_L2_PAGETABLE)
    9.63 -            ptwr_reconnect_disconnected(0L);
    9.64 -    if (what & PTWR_CLEANUP_INACTIVE)
    9.65 -        if (ptwr_info[cpu].writable_idx)
    9.66 -            ptwr_flush_inactive();
    9.67 -}
    9.68 -
    9.69 -static always_inline void
    9.70 -cleanup_writable_pagetable(
    9.71 -    struct domain *d, const int what)
    9.72 -{
    9.73 -    if ( unlikely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
    9.74 -        __cleanup_writable_pagetable(what);
    9.75 -}
    9.76 +#define cleanup_writable_pagetable(_d, _w)                                \
    9.77 +    do {                                                                  \
    9.78 +        if ( unlikely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) \
    9.79 +        __cleanup_writable_pagetable(_w);                                 \
    9.80 +    } while ( 0 )
    9.81  
    9.82  #endif /* __ASM_X86_MM_H__ */
    10.1 --- a/xen/include/asm-x86/smp.h	Wed Aug 25 15:38:29 2004 +0000
    10.2 +++ b/xen/include/asm-x86/smp.h	Wed Aug 25 15:40:15 2004 +0000
    10.3 @@ -1,26 +1,13 @@
    10.4  #ifndef __ASM_SMP_H
    10.5  #define __ASM_SMP_H
    10.6  
    10.7 -/*
    10.8 - * We need the APIC definitions automatically as part of 'smp.h'
    10.9 - */
   10.10  #ifndef __ASSEMBLY__
   10.11  #include <xen/config.h>
   10.12 -/*#include <xen/threads.h>*/
   10.13 -#include <asm/ptrace.h>
   10.14 -#endif
   10.15 -
   10.16 -#ifdef CONFIG_X86_LOCAL_APIC
   10.17 -#ifndef __ASSEMBLY__
   10.18  #include <asm/fixmap.h>
   10.19 -#include <asm/bitops.h>
   10.20  #include <asm/mpspec.h>
   10.21 -#ifdef CONFIG_X86_IO_APIC
   10.22  #include <asm/io_apic.h>
   10.23 -#endif
   10.24  #include <asm/apic.h>
   10.25  #endif
   10.26 -#endif
   10.27  
   10.28  #ifdef CONFIG_SMP
   10.29  #ifndef __ASSEMBLY__
   10.30 @@ -37,12 +24,6 @@ extern int pic_mode;
   10.31  extern int smp_num_siblings;
   10.32  extern int cpu_sibling_map[];
   10.33  
   10.34 -extern void smp_flush_tlb(void);
   10.35 -extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
   10.36 -extern void smp_send_reschedule(int cpu);
   10.37 -extern void smp_invalidate_rcv(void);		/* Process an NMI */
   10.38 -extern void (*mtrr_hook) (void);
   10.39 -
   10.40  /*
   10.41   * On x86 all CPUs are mapped 1:1 to the APIC space.
   10.42   * This simplifies scheduling and IPI sending and
    11.1 --- a/xen/include/asm-x86/system.h	Wed Aug 25 15:38:29 2004 +0000
    11.2 +++ b/xen/include/asm-x86/system.h	Wed Aug 25 15:40:15 2004 +0000
    11.3 @@ -30,33 +30,33 @@ static always_inline unsigned long __xch
    11.4  		case 1:
    11.5  			__asm__ __volatile__("xchgb %b0,%1"
    11.6  				:"=q" (x)
    11.7 -				:"m" (*__xg(ptr)), "0" (x)
    11.8 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
    11.9  				:"memory");
   11.10  			break;
   11.11  		case 2:
   11.12  			__asm__ __volatile__("xchgw %w0,%1"
   11.13  				:"=r" (x)
   11.14 -				:"m" (*__xg(ptr)), "0" (x)
   11.15 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
   11.16  				:"memory");
   11.17  			break;
   11.18  #if defined(__i386__)
   11.19  		case 4:
   11.20  			__asm__ __volatile__("xchgl %0,%1"
   11.21  				:"=r" (x)
   11.22 -				:"m" (*__xg(ptr)), "0" (x)
   11.23 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
   11.24  				:"memory");
   11.25  			break;
   11.26  #elif defined(__x86_64__)
   11.27  		case 4:
   11.28  			__asm__ __volatile__("xchgl %k0,%1"
   11.29  				:"=r" (x)
   11.30 -				:"m" (*__xg(ptr)), "0" (x)
   11.31 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
   11.32  				:"memory");
   11.33  			break;
   11.34  		case 8:
   11.35  			__asm__ __volatile__("xchgq %0,%1"
   11.36  				:"=r" (x)
   11.37 -				:"m" (*__xg(ptr)), "0" (x)
   11.38 +				:"m" (*__xg((volatile void *)ptr)), "0" (x)
   11.39  				:"memory");
   11.40  			break;
   11.41  #endif
   11.42 @@ -78,33 +78,33 @@ static always_inline unsigned long __cmp
   11.43  	case 1:
   11.44  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
   11.45  				     : "=a"(prev)
   11.46 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   11.47 +				     : "q"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   11.48  				     : "memory");
   11.49  		return prev;
   11.50  	case 2:
   11.51  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
   11.52  				     : "=a"(prev)
   11.53 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   11.54 +				     : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   11.55  				     : "memory");
   11.56  		return prev;
   11.57  #if defined(__i386__)
   11.58  	case 4:
   11.59  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
   11.60  				     : "=a"(prev)
   11.61 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   11.62 +				     : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   11.63  				     : "memory");
   11.64  		return prev;
   11.65  #elif defined(__x86_64__)
   11.66  	case 4:
   11.67  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
   11.68  				     : "=a"(prev)
   11.69 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   11.70 +				     : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   11.71  				     : "memory");
   11.72  		return prev;
   11.73  	case 8:
   11.74  		__asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
   11.75  				     : "=a"(prev)
   11.76 -				     : "q"(new), "m"(*__xg(ptr)), "0"(old)
   11.77 +				     : "r"(new), "m"(*__xg((volatile void *)ptr)), "0"(old)
   11.78  				     : "memory");
   11.79  		return prev;
   11.80  #endif
    12.1 --- a/xen/include/xen/grant_table.h	Wed Aug 25 15:38:29 2004 +0000
    12.2 +++ b/xen/include/xen/grant_table.h	Wed Aug 25 15:40:15 2004 +0000
    12.3 @@ -24,6 +24,8 @@
    12.4  #ifndef __XEN_GRANT_H__
    12.5  #define __XEN_GRANT_H__
    12.6  
    12.7 +#include <xen/config.h>
    12.8 +#include <xen/mm.h>
    12.9  #include <hypervisor-ifs/grant_table.h>
   12.10  
   12.11  /* Active grant entry - used for shadowing GTF_permit_access grants. */
   12.12 @@ -65,10 +67,19 @@ typedef struct {
   12.13  } grant_table_t;
   12.14  
   12.15  /* Start-of-day system initialisation. */
   12.16 -void grant_table_init(void);
   12.17 +void grant_table_init(
   12.18 +    void);
   12.19  
   12.20  /* Create/destroy per-domain grant table context. */
   12.21 -int  grant_table_create(struct domain *d);
   12.22 -void grant_table_destroy(struct domain *d);
   12.23 +int grant_table_create(
   12.24 +    struct domain *d);
   12.25 +void grant_table_destroy(
   12.26 +    struct domain *d);
   12.27 +
   12.28 +/* Create/destroy host-CPU mappings via a grant-table entry. */
   12.29 +int gnttab_try_map(
   12.30 +    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
   12.31 +int gnttab_try_unmap(
   12.32 +    struct domain *rd, struct domain *ld, struct pfn_info *page, int readonly);
   12.33  
   12.34  #endif /* __XEN_GRANT_H__ */
    13.1 --- a/xen/include/xen/sched.h	Wed Aug 25 15:38:29 2004 +0000
    13.2 +++ b/xen/include/xen/sched.h	Wed Aug 25 15:40:15 2004 +0000
    13.3 @@ -1,6 +1,9 @@
    13.4  #ifndef __SCHED_H__
    13.5  #define __SCHED_H__
    13.6  
    13.7 +#define STACK_SIZE (2*PAGE_SIZE)
    13.8 +#define MAX_DOMAIN_NAME 16
    13.9 +
   13.10  #include <xen/config.h>
   13.11  #include <xen/types.h>
   13.12  #include <xen/spinlock.h>
   13.13 @@ -10,23 +13,18 @@
   13.14  #include <asm/processor.h>
   13.15  #include <hypervisor-ifs/hypervisor-if.h>
   13.16  #include <hypervisor-ifs/dom0_ops.h>
   13.17 -#include <xen/grant_table.h>
   13.18  #include <xen/list.h>
   13.19  #include <xen/time.h>
   13.20  #include <xen/ac_timer.h>
   13.21  #include <xen/delay.h>
   13.22  #include <asm/atomic.h>
   13.23 -
   13.24 -#define STACK_SIZE (2*PAGE_SIZE)
   13.25  #include <asm/current.h>
   13.26 -
   13.27 -#define MAX_DOMAIN_NAME 16
   13.28 +#include <xen/spinlock.h>
   13.29 +#include <xen/grant_table.h>
   13.30  
   13.31  extern unsigned long volatile jiffies;
   13.32  extern rwlock_t tasklist_lock;
   13.33  
   13.34 -#include <xen/spinlock.h>
   13.35 -
   13.36  struct domain;
   13.37  
   13.38  typedef struct event_channel_st
   13.39 @@ -167,10 +165,19 @@ struct domain *alloc_domain_struct();
   13.40   * Use this when you don't have an existing reference to @d. It returns
   13.41   * FALSE if @d is being destructed.
   13.42   */
   13.43 -static inline int get_domain(struct domain *d)
   13.44 +static always_inline int get_domain(struct domain *d)
   13.45  {
   13.46 -    atomic_inc(&d->refcnt);
   13.47 -    return !(atomic_read(&d->refcnt) & DOMAIN_DESTRUCTED);
   13.48 +    atomic_t old, new, seen = d->refcnt;
   13.49 +    do
   13.50 +    {
   13.51 +        old = seen;
   13.52 +        if ( unlikely(_atomic_read(old) & DOMAIN_DESTRUCTED) )
   13.53 +            return 0;
   13.54 +        _atomic_set(new, _atomic_read(old) + 1);
   13.55 +        seen = atomic_compareandswap(old, new, &d->refcnt);
   13.56 +    }
   13.57 +    while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
   13.58 +    return 1;
   13.59  }
   13.60  
   13.61  /*