direct-io.hg

changeset 10394:bc76ad9d6270

[IA64] remove some races between the p2m table and the m2p table

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author awilliam@xenbuild.aw
date Fri Jun 09 10:35:42 2006 -0600 (2006-06-09)
parents 6fdafeeb88bb
children 43d9c8042ab6
files xen/arch/ia64/xen/domain.c xen/arch/ia64/xen/mm.c xen/include/asm-ia64/linux-xen/asm/pgalloc.h xen/include/asm-ia64/linux-xen/asm/pgtable.h xen/include/asm-ia64/mm.h
line diff
     1.1 --- a/xen/arch/ia64/xen/domain.c	Fri Jun 09 10:35:41 2006 -0600
     1.2 +++ b/xen/arch/ia64/xen/domain.c	Fri Jun 09 10:35:42 2006 -0600
     1.3 @@ -472,13 +472,7 @@ static void relinquish_memory(struct dom
     1.4          /* Follow the list chain and /then/ potentially free the page. */
     1.5          ent = ent->next;
     1.6  #ifdef CONFIG_XEN_IA64_DOM0_VP
     1.7 -#if 1
     1.8          BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY);
     1.9 -#else
    1.10 -        //XXX this should be done at traversing the P2M table.
    1.11 -        if (page_get_owner(page) == d)
    1.12 -            set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
    1.13 -#endif
    1.14  #endif
    1.15          put_page(page);
    1.16      }
     2.1 --- a/xen/arch/ia64/xen/mm.c	Fri Jun 09 10:35:41 2006 -0600
     2.2 +++ b/xen/arch/ia64/xen/mm.c	Fri Jun 09 10:35:42 2006 -0600
     2.3 @@ -212,16 +212,6 @@ share_xen_page_with_guest(struct page_in
     2.4  
     2.5      // alloc_xenheap_pages() doesn't initialize page owner.
     2.6      //BUG_ON(page_get_owner(page) != NULL);
     2.7 -#if 0
     2.8 -    if (get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY) {
     2.9 -        printk("%s:%d page 0x%p mfn 0x%lx gpfn 0x%lx\n", __func__, __LINE__,
    2.10 -               page, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)));
    2.11 -    }
    2.12 -#endif
    2.13 -    // grant_table_destroy() release these pages.
    2.14 -    // but it doesn't clear m2p entry. So there might remain stale entry.
    2.15 -    // We clear such a stale entry here.
    2.16 -    set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
    2.17  
    2.18      spin_lock(&d->page_alloc_lock);
    2.19  
    2.20 @@ -240,6 +230,11 @@ share_xen_page_with_guest(struct page_in
    2.21          get_knownalive_domain(d);
    2.22      list_add_tail(&page->list, &d->xenpage_list);
    2.23  
    2.24 +    // grant_table_destroy() releases these pages.
    2.25 +    // but it doesn't clear their m2p entry. So there might remain stale
    2.26 +    // entries. such a stale entry is cleared here.
    2.27 +    set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
    2.28 +
    2.29      spin_unlock(&d->page_alloc_lock);
    2.30  }
    2.31  
    2.32 @@ -351,6 +346,9 @@ unsigned long translate_domain_mpaddr(un
    2.33  }
    2.34  
    2.35  //XXX !xxx_present() should be used instread of !xxx_none()?
    2.36 +// pud, pmd, pte page is zero cleared when they are allocated.
    2.37 +// Their area must be visible before population so that
    2.38 +// cmpxchg must have release semantics.
    2.39  static pte_t*
    2.40  lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
    2.41  {
    2.42 @@ -360,19 +358,38 @@ lookup_alloc_domain_pte(struct domain* d
    2.43      pmd_t *pmd;
    2.44  
    2.45      BUG_ON(mm->pgd == NULL);
    2.46 +
    2.47      pgd = pgd_offset(mm, mpaddr);
    2.48 -    if (pgd_none(*pgd)) {
    2.49 -        pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
    2.50 + again_pgd:
    2.51 +    if (unlikely(pgd_none(*pgd))) {
    2.52 +        pud_t *old_pud = NULL;
    2.53 +        pud = pud_alloc_one(mm, mpaddr);
    2.54 +        if (unlikely(!pgd_cmpxchg_rel(mm, pgd, old_pud, pud))) {
    2.55 +            pud_free(pud);
    2.56 +            goto again_pgd;
    2.57 +        }
    2.58      }
    2.59  
    2.60      pud = pud_offset(pgd, mpaddr);
    2.61 -    if (pud_none(*pud)) {
    2.62 -        pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
    2.63 + again_pud:
    2.64 +    if (unlikely(pud_none(*pud))) {
    2.65 +        pmd_t* old_pmd = NULL;
    2.66 +        pmd = pmd_alloc_one(mm, mpaddr);
    2.67 +        if (unlikely(!pud_cmpxchg_rel(mm, pud, old_pmd, pmd))) {
    2.68 +            pmd_free(pmd);
    2.69 +            goto again_pud;
    2.70 +        }
    2.71      }
    2.72  
    2.73      pmd = pmd_offset(pud, mpaddr);
    2.74 -    if (pmd_none(*pmd)) {
    2.75 -        pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
    2.76 + again_pmd:
    2.77 +    if (unlikely(pmd_none(*pmd))) {
    2.78 +        pte_t* old_pte = NULL;
    2.79 +        pte_t* pte = pte_alloc_one_kernel(mm, mpaddr);
    2.80 +        if (unlikely(!pmd_cmpxchg_kernel_rel(mm, pmd, old_pte, pte))) {
    2.81 +            pte_free_kernel(pte);
    2.82 +            goto again_pmd;
    2.83 +        }
    2.84      }
    2.85  
    2.86      return pte_offset_map(pmd, mpaddr);
    2.87 @@ -389,15 +406,15 @@ lookup_noalloc_domain_pte(struct domain*
    2.88  
    2.89      BUG_ON(mm->pgd == NULL);
    2.90      pgd = pgd_offset(mm, mpaddr);
    2.91 -    if (!pgd_present(*pgd))
    2.92 +    if (unlikely(!pgd_present(*pgd)))
    2.93          return NULL;
    2.94  
    2.95      pud = pud_offset(pgd, mpaddr);
    2.96 -    if (!pud_present(*pud))
    2.97 +    if (unlikely(!pud_present(*pud)))
    2.98          return NULL;
    2.99  
   2.100      pmd = pmd_offset(pud, mpaddr);
   2.101 -    if (!pmd_present(*pmd))
   2.102 +    if (unlikely(!pmd_present(*pmd)))
   2.103          return NULL;
   2.104  
   2.105      return pte_offset_map(pmd, mpaddr);
   2.106 @@ -414,15 +431,15 @@ lookup_noalloc_domain_pte_none(struct do
   2.107  
   2.108      BUG_ON(mm->pgd == NULL);
   2.109      pgd = pgd_offset(mm, mpaddr);
   2.110 -    if (pgd_none(*pgd))
   2.111 +    if (unlikely(pgd_none(*pgd)))
   2.112          return NULL;
   2.113  
   2.114      pud = pud_offset(pgd, mpaddr);
   2.115 -    if (pud_none(*pud))
   2.116 +    if (unlikely(pud_none(*pud)))
   2.117          return NULL;
   2.118  
   2.119      pmd = pmd_offset(pud, mpaddr);
   2.120 -    if (pmd_none(*pmd))
   2.121 +    if (unlikely(pmd_none(*pmd)))
   2.122          return NULL;
   2.123  
   2.124      return pte_offset_map(pmd, mpaddr);
   2.125 @@ -565,13 +582,14 @@ struct page_info *
   2.126  
   2.127      ret = get_page(p, d);
   2.128      BUG_ON(ret == 0);
   2.129 -    set_pte(pte, pfn_pte(maddr >> PAGE_SHIFT,
   2.130 -                         __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
   2.131 +    set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
   2.132 +    // clear_page() and set_gpfn_from_mfn() become visible before set_pte_rel()
   2.133 +    // because set_pte_rel() has release semantics
   2.134 +    set_pte_rel(pte,
   2.135 +                pfn_pte(maddr >> PAGE_SHIFT,
   2.136 +                        __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
   2.137  
   2.138 -    mb ();
   2.139 -    //XXX CONFIG_XEN_IA64_DOM0_VP
   2.140 -    //    TODO racy
   2.141 -    set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
   2.142 +    smp_mb();
   2.143      return p;
   2.144  }
   2.145  
   2.146 @@ -626,9 +644,10 @@ void
   2.147  
   2.148      pte = lookup_alloc_domain_pte(d, mpaddr);
   2.149      if (pte_none(*pte)) {
   2.150 -        set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
   2.151 -                             __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags)));
   2.152 -        mb ();
   2.153 +        set_pte_rel(pte,
   2.154 +                    pfn_pte(physaddr >> PAGE_SHIFT,
   2.155 +                            __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags)));
   2.156 +        smp_mb();
   2.157      } else
   2.158          printk("%s: mpaddr %lx already mapped!\n", __func__, mpaddr);
   2.159  }
   2.160 @@ -644,11 +663,10 @@ assign_domain_page(struct domain *d,
   2.161      BUG_ON((physaddr & GPFN_IO_MASK) != GPFN_MEM);
   2.162      ret = get_page(page, d);
   2.163      BUG_ON(ret == 0);
   2.164 +    set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
   2.165 +    // because __assign_domain_page() uses set_pte_rel() which has
   2.166 +    // release semantics, smp_mb() isn't needed.
   2.167      __assign_domain_page(d, mpaddr, physaddr, ASSIGN_writable);
   2.168 -
   2.169 -    //XXX CONFIG_XEN_IA64_DOM0_VP
   2.170 -    //    TODO racy
   2.171 -    set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
   2.172  }
   2.173  
   2.174  #ifdef CONFIG_XEN_IA64_DOM0_VP
   2.175 @@ -740,8 +758,10 @@ assign_domain_mach_page(struct domain *d
   2.176      return mpaddr;
   2.177  }
   2.178  
   2.179 -// caller must get_page(mfn_to_page(mfn)) before
   2.180 -// caller must call set_gpfn_from_mfn().
   2.181 +// caller must get_page(mfn_to_page(mfn)) before call.
   2.182 +// caller must call set_gpfn_from_mfn() before call if necessary.
   2.183 +// because set_gpfn_from_mfn() result must be visible before pte xchg
   2.184 +// caller must use memory barrier. NOTE: xchg has acquire semantics.
   2.185  // flags: currently only ASSIGN_readonly
   2.186  static void
   2.187  assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
   2.188 @@ -752,39 +772,95 @@ assign_domain_page_replace(struct domain
   2.189      pte_t old_pte;
   2.190      pte_t npte;
   2.191      unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX;
   2.192 -
   2.193      pte = lookup_alloc_domain_pte(d, mpaddr);
   2.194  
   2.195      // update pte
   2.196      npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags));
   2.197      old_pte = ptep_xchg(mm, mpaddr, pte, npte);
   2.198      if (pte_mem(old_pte)) {
   2.199 -        unsigned long old_mfn;
   2.200 -        struct page_info* old_page;
   2.201 +        unsigned long old_mfn = pte_pfn(old_pte);
   2.202  
   2.203 -        // XXX should previous underlying page be removed?
   2.204 -        //  or should error be returned because it is a due to a domain?
   2.205 -        old_mfn = pte_pfn(old_pte);//XXX
   2.206 -        old_page = mfn_to_page(old_mfn);
   2.207 +        // mfn = old_mfn case can happen when domain maps a granted page
   2.208 +        // twice with the same pseudo physial address.
   2.209 +        // It's non sense, but allowed.
   2.210 +        // __gnttab_map_grant_ref()
   2.211 +        //   => create_host_mapping()
   2.212 +        //      => assign_domain_page_replace()
   2.213 +        if (mfn != old_mfn) {
   2.214 +            struct page_info* old_page = mfn_to_page(old_mfn);
   2.215  
   2.216 -        if (page_get_owner(old_page) == d) {
   2.217 -            BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
   2.218 -            set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
   2.219 -        }
   2.220 -
   2.221 -        domain_page_flush(d, mpaddr, old_mfn, mfn);
   2.222 +            if (page_get_owner(old_page) == d) {
   2.223 +                BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
   2.224 +                set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
   2.225 +            }
   2.226  
   2.227 -        try_to_clear_PGC_allocate(d, old_page);
   2.228 -        put_page(old_page);
   2.229 -    } else {
   2.230 -        BUG_ON(!mfn_valid(mfn));
   2.231 -        BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
   2.232 -               get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
   2.233 +            domain_page_flush(d, mpaddr, old_mfn, mfn);
   2.234 +
   2.235 +            try_to_clear_PGC_allocate(d, old_page);
   2.236 +            put_page(old_page);
   2.237 +        }
   2.238      }
   2.239  }
   2.240  
   2.241 +// caller must get_page(new_page) before
   2.242 +// Only steal_page_for_grant_transfer() calls this function.
   2.243 +static int
   2.244 +assign_domain_page_cmpxchg_rel(struct domain* d, unsigned long mpaddr,
   2.245 +                               struct page_info* old_page,
   2.246 +                               struct page_info* new_page,
   2.247 +                               unsigned long flags)
   2.248 +{
   2.249 +    struct mm_struct *mm = &d->arch.mm;
   2.250 +    pte_t* pte;
   2.251 +    unsigned long old_mfn;
   2.252 +    unsigned long old_arflags;
   2.253 +    pte_t old_pte;
   2.254 +    unsigned long new_mfn;
   2.255 +    unsigned long new_arflags;
   2.256 +    pte_t new_pte;
   2.257 +    pte_t ret_pte;
   2.258 +
   2.259 +    pte = lookup_alloc_domain_pte(d, mpaddr);
   2.260 +
   2.261 + again:
   2.262 +    old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;//XXX
   2.263 +    old_mfn = page_to_mfn(old_page);
   2.264 +    old_pte = pfn_pte(old_mfn, __pgprot(old_arflags));
   2.265 +
   2.266 +    new_arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX;
   2.267 +    new_mfn = page_to_mfn(new_page);
   2.268 +    new_pte = pfn_pte(new_mfn,
   2.269 +                      __pgprot(__DIRTY_BITS | _PAGE_PL_2 | new_arflags));
   2.270 +
   2.271 +    // update pte
   2.272 +    ret_pte = ptep_cmpxchg_rel(mm, mpaddr, pte, old_pte, new_pte);
   2.273 +    if (unlikely(pte_val(old_pte) != pte_val(ret_pte))) {
   2.274 +        if (pte_pfn(old_pte) == pte_pfn(ret_pte)) {
   2.275 +            goto again;
   2.276 +        }
   2.277 +
   2.278 +        DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx "
   2.279 +                "ret_pte 0x%lx ret_mfn 0x%lx\n",
   2.280 +                __func__,
   2.281 +                pte_val(old_pte), old_arflags, old_mfn,
   2.282 +                pte_val(ret_pte), pte_pfn(ret_pte));
   2.283 +        return -EINVAL;
   2.284 +    }
   2.285 +
   2.286 +    BUG_ON(!pte_mem(old_pte));
   2.287 +    BUG_ON(page_get_owner(old_page) != d);
   2.288 +    BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
   2.289 +    BUG_ON(old_mfn == new_mfn);
   2.290 +
   2.291 +    set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
   2.292 +
   2.293 +    domain_page_flush(d, mpaddr, old_mfn, new_mfn);
   2.294 +    put_page(old_page);
   2.295 +    return 0;
   2.296 +}
   2.297 +
   2.298  static void
   2.299 -zap_domain_page_one(struct domain *d, unsigned long mpaddr, int do_put_page)
   2.300 +zap_domain_page_one(struct domain *d, unsigned long mpaddr)
   2.301  {
   2.302      struct mm_struct *mm = &d->arch.mm;
   2.303      pte_t *pte;
   2.304 @@ -811,13 +887,10 @@ zap_domain_page_one(struct domain *d, un
   2.305  
   2.306      domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
   2.307  
   2.308 -    if (do_put_page) {
   2.309 -        try_to_clear_PGC_allocate(d, page);
   2.310 -        put_page(page);
   2.311 -    }
   2.312 +    try_to_clear_PGC_allocate(d, page);
   2.313 +    put_page(page);
   2.314  }
   2.315  
   2.316 -//XXX SMP
   2.317  unsigned long
   2.318  dom0vp_zap_physmap(struct domain *d, unsigned long gpfn,
   2.319                     unsigned int extent_order)
   2.320 @@ -827,7 +900,7 @@ dom0vp_zap_physmap(struct domain *d, uns
   2.321          return -ENOSYS;
   2.322      }
   2.323  
   2.324 -    zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1);
   2.325 +    zap_domain_page_one(d, gpfn << PAGE_SHIFT);
   2.326      return 0;
   2.327  }
   2.328  
   2.329 @@ -861,11 +934,13 @@ dom0vp_add_physmap(struct domain* d, uns
   2.330          error = -EINVAL;
   2.331          goto out1;
   2.332      }
   2.333 +    BUG_ON(!mfn_valid(mfn));
   2.334      if (unlikely(get_page(mfn_to_page(mfn), rd) == 0)) {
   2.335          error = -EINVAL;
   2.336          goto out1;
   2.337      }
   2.338 -
   2.339 +    BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
   2.340 +           get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
   2.341      assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, flags);
   2.342      //don't update p2m table because this page belongs to rd, not d.
   2.343  out1:
   2.344 @@ -891,10 +966,12 @@ create_grant_host_mapping(unsigned long 
   2.345          return GNTST_general_error;
   2.346      }
   2.347  
   2.348 +    BUG_ON(!mfn_valid(mfn));
   2.349      page = mfn_to_page(mfn);
   2.350      ret = get_page(page, page_get_owner(page));
   2.351      BUG_ON(ret == 0);
   2.352 -
   2.353 +    BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
   2.354 +           get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
   2.355      assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)?
   2.356                                                ASSIGN_readonly: ASSIGN_writable);
   2.357      return GNTST_okay;
   2.358 @@ -937,7 +1014,6 @@ destroy_grant_host_mapping(unsigned long
   2.359  }
   2.360  
   2.361  // heavily depends on the struct page layout.
   2.362 -//XXX SMP
   2.363  int
   2.364  steal_page_for_grant_transfer(struct domain *d, struct page_info *page)
   2.365  {
   2.366 @@ -946,11 +1022,41 @@ steal_page_for_grant_transfer(struct dom
   2.367  #endif
   2.368      u32 _d, _nd;
   2.369      u64 x, nx, y;
   2.370 -    unsigned long mpaddr = get_gpfn_from_mfn(page_to_mfn(page)) << PAGE_SHIFT;
   2.371 +    unsigned long gpfn;
   2.372      struct page_info *new;
   2.373 +    unsigned long new_mfn;
   2.374 +    int ret;
   2.375 +    new = alloc_domheap_page(d);
   2.376 +    if (new == NULL) {
   2.377 +        DPRINTK("alloc_domheap_page() failed\n");
   2.378 +        return -1;
   2.379 +    }
   2.380 +    // zero out pages for security reasons
   2.381 +    clear_page(page_to_virt(new));
   2.382 +    // assign_domain_page_cmpxchg_rel() has release semantics
   2.383 +    // so smp_mb() isn't needed.
   2.384 +
   2.385 +    ret = get_page(new, d);
   2.386 +    BUG_ON(ret == 0);
   2.387  
   2.388 -    zap_domain_page_one(d, mpaddr, 0);
   2.389 -    put_page(page);
   2.390 +    gpfn = get_gpfn_from_mfn(page_to_mfn(page));
   2.391 +    if (gpfn == INVALID_M2P_ENTRY) {
   2.392 +        free_domheap_page(new);
   2.393 +        return -1;
   2.394 +    }
   2.395 +    new_mfn = page_to_mfn(new);
   2.396 +    set_gpfn_from_mfn(new_mfn, gpfn);
   2.397 +    // smp_mb() isn't needed because assign_domain_pge_cmpxchg_rel()
   2.398 +    // has release semantics.
   2.399 +
   2.400 +    ret = assign_domain_page_cmpxchg_rel(d, gpfn << PAGE_SHIFT, page, new,
   2.401 +                                         ASSIGN_writable);
   2.402 +    if (ret < 0) {
   2.403 +        DPRINTK("assign_domain_page_cmpxchg_rel failed %d\n", ret);
   2.404 +        set_gpfn_from_mfn(new_mfn, INVALID_M2P_ENTRY);
   2.405 +        free_domheap_page(new);
   2.406 +        return -1;
   2.407 +    }
   2.408  
   2.409      spin_lock(&d->page_alloc_lock);
   2.410  
   2.411 @@ -1006,14 +1112,6 @@ steal_page_for_grant_transfer(struct dom
   2.412      list_del(&page->list);
   2.413  
   2.414      spin_unlock(&d->page_alloc_lock);
   2.415 -
   2.416 -#if 1
   2.417 -    //XXX Until net_rx_action() fix
   2.418 -    // assign new page for this mpaddr
   2.419 -    new = assign_new_domain_page(d, mpaddr);
   2.420 -    BUG_ON(new == NULL);//XXX
   2.421 -#endif
   2.422 -
   2.423      return 0;
   2.424  }
   2.425  
   2.426 @@ -1023,10 +1121,14 @@ guest_physmap_add_page(struct domain *d,
   2.427  {
   2.428      int ret;
   2.429  
   2.430 +    BUG_ON(!mfn_valid(mfn));
   2.431      ret = get_page(mfn_to_page(mfn), d);
   2.432      BUG_ON(ret == 0);
   2.433 +    BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
   2.434 +           get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
   2.435 +    set_gpfn_from_mfn(mfn, gpfn);
   2.436 +    smp_mb();
   2.437      assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, ASSIGN_writable);
   2.438 -    set_gpfn_from_mfn(mfn, gpfn);//XXX SMP
   2.439  
   2.440      //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT));
   2.441  }
   2.442 @@ -1036,7 +1138,7 @@ guest_physmap_remove_page(struct domain 
   2.443                            unsigned long mfn)
   2.444  {
   2.445      BUG_ON(mfn == 0);//XXX
   2.446 -    zap_domain_page_one(d, gpfn << PAGE_SHIFT, 1);
   2.447 +    zap_domain_page_one(d, gpfn << PAGE_SHIFT);
   2.448  }
   2.449  
   2.450  //XXX sledgehammer.
   2.451 @@ -1216,7 +1318,7 @@ void put_page_type(struct page_info *pag
   2.452              nx |= PGT_va_mutable;
   2.453          }
   2.454      }
   2.455 -    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
   2.456 +    while ( unlikely((y = cmpxchg_rel(&page->u.inuse.type_info, x, nx)) != x) );
   2.457  }
   2.458  
   2.459  
   2.460 @@ -1324,7 +1426,7 @@ int get_page_type(struct page_info *page
   2.461              }
   2.462          }
   2.463      }
   2.464 -    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
   2.465 +    while ( unlikely((y = cmpxchg_acq(&page->u.inuse.type_info, x, nx)) != x) );
   2.466  
   2.467      if ( unlikely(!(nx & PGT_validated)) )
   2.468      {
     3.1 --- a/xen/include/asm-ia64/linux-xen/asm/pgalloc.h	Fri Jun 09 10:35:41 2006 -0600
     3.2 +++ b/xen/include/asm-ia64/linux-xen/asm/pgalloc.h	Fri Jun 09 10:35:42 2006 -0600
     3.3 @@ -78,6 +78,15 @@ static inline void pgtable_quicklist_fre
     3.4  }
     3.5  #endif
     3.6  
     3.7 +#ifdef XEN
     3.8 +#include <asm/pgtable.h>
     3.9 +#ifdef __PAGETABLE_PUD_FOLDED
    3.10 +# define pgd_cmpxchg_rel(mm, pgd, old_pud, new_pud)	({(void)old_pud;1;})
    3.11 +#else
    3.12 +# error "implement pgd_cmpxchg_rel()!"
    3.13 +#endif
    3.14 +#endif
    3.15 +
    3.16  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
    3.17  {
    3.18  	return pgtable_quicklist_alloc();
    3.19 @@ -94,6 +103,25 @@ pud_populate(struct mm_struct *mm, pud_t
    3.20  	pud_val(*pud_entry) = __pa(pmd);
    3.21  }
    3.22  
    3.23 +#ifdef XEN
    3.24 +static inline int
    3.25 +pud_cmpxchg_rel(struct mm_struct *mm, pud_t * pud_entry,
    3.26 +		pmd_t * old_pmd, pmd_t * new_pmd)
    3.27 +{
    3.28 +#ifdef CONFIG_SMP
    3.29 +	unsigned long r;
    3.30 +	r = cmpxchg_rel(&pud_val(*pud_entry), __pa(old_pmd), __pa(new_pmd));
    3.31 +	return (r == __pa(old_pmd));
    3.32 +#else
    3.33 +	if (pud_val(*pud_entry) == __pa(old_pmd)) {
    3.34 +		pud_val(*pud_entry) = __pa(new_pmd);
    3.35 +		return 1;
    3.36 +	}
    3.37 +	return 0;
    3.38 +#endif
    3.39 +}
    3.40 +#endif
    3.41 +
    3.42  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
    3.43  {
    3.44  	return pgtable_quicklist_alloc();
    3.45 @@ -120,6 +148,25 @@ pmd_populate_kernel(struct mm_struct *mm
    3.46  	pmd_val(*pmd_entry) = __pa(pte);
    3.47  }
    3.48  
    3.49 +#ifdef XEN
    3.50 +static inline int
    3.51 +pmd_cmpxchg_kernel_rel(struct mm_struct *mm, pmd_t * pmd_entry,
    3.52 +		       pte_t * old_pte, pte_t * new_pte)
    3.53 +{
    3.54 +#ifdef CONFIG_SMP
    3.55 +	unsigned long r;
    3.56 +	r = cmpxchg_rel(&pmd_val(*pmd_entry), __pa(old_pte), __pa(new_pte));
    3.57 +	return (r == __pa(old_pte));
    3.58 +#else
    3.59 +	if (pmd_val(*pmd_entry) == __pa(old_pte)) {
    3.60 +		pmd_val(*pmd_entry) = __pa(new_pte);
    3.61 +		return 1;
    3.62 +	}
    3.63 +	return 0;
    3.64 +#endif
    3.65 +}
    3.66 +#endif
    3.67 +
    3.68  #ifndef XEN
    3.69  static inline struct page *pte_alloc_one(struct mm_struct *mm,
    3.70  					 unsigned long addr)
     4.1 --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h	Fri Jun 09 10:35:41 2006 -0600
     4.2 +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h	Fri Jun 09 10:35:42 2006 -0600
     4.3 @@ -208,6 +208,19 @@ ia64_phys_addr_valid (unsigned long addr
     4.4   */
     4.5  #define set_pte(ptep, pteval)	(*(ptep) = (pteval))
     4.6  #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
     4.7 +#ifdef XEN
     4.8 +static inline void
     4.9 +set_pte_rel(pte_t* ptep, pte_t pteval)
    4.10 +{
    4.11 +#if CONFIG_SMP
    4.12 +	asm volatile ("st8.rel [%0]=%1" ::
    4.13 +		      "r"(&pte_val(*ptep)), "r"(pte_val(pteval)) :
    4.14 +		      "memory");
    4.15 +#else
    4.16 +	set_pte(ptep, pteval);
    4.17 +#endif
    4.18 +}
    4.19 +#endif
    4.20  
    4.21  #define RGN_SIZE	(1UL << 61)
    4.22  #define RGN_KERNEL	7
    4.23 @@ -401,6 +414,7 @@ ptep_get_and_clear(struct mm_struct *mm,
    4.24  #endif
    4.25  }
    4.26  
    4.27 +#ifdef XEN
    4.28  static inline pte_t
    4.29  ptep_xchg(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t npte)
    4.30  {
    4.31 @@ -413,6 +427,23 @@ ptep_xchg(struct mm_struct *mm, unsigned
    4.32  #endif
    4.33  }
    4.34  
    4.35 +static inline pte_t
    4.36 +ptep_cmpxchg_rel(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
    4.37 +		 pte_t old_pte, pte_t new_pte)
    4.38 +{
    4.39 +#ifdef CONFIG_SMP
    4.40 +	return __pte(cmpxchg_rel(&pte_val(*ptep),
    4.41 +				 pte_val(old_pte), pte_val(new_pte)));
    4.42 +#else
    4.43 +	pte_t pte = *ptep;
    4.44 +	if (pte_val(pte) == pte_val(old_pte)) {
    4.45 +		set_pte(ptep, npte);
    4.46 +	}
    4.47 +	return pte;
    4.48 +#endif
    4.49 +}
    4.50 +#endif
    4.51 +
    4.52  #ifndef XEN
    4.53  static inline void
    4.54  ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
     5.1 --- a/xen/include/asm-ia64/mm.h	Fri Jun 09 10:35:41 2006 -0600
     5.2 +++ b/xen/include/asm-ia64/mm.h	Fri Jun 09 10:35:42 2006 -0600
     5.3 @@ -160,7 +160,7 @@ static inline void put_page(struct page_
     5.4  	x = y;
     5.5  	nx = x - 1;
     5.6      }
     5.7 -    while (unlikely((y = cmpxchg(&page->count_info, x, nx)) != x));
     5.8 +    while (unlikely((y = cmpxchg_rel(&page->count_info, x, nx)) != x));
     5.9  
    5.10      if (unlikely((nx & PGC_count_mask) == 0))
    5.11  	free_domheap_page(page);
    5.12 @@ -186,7 +186,7 @@ static inline int get_page(struct page_i
    5.13  	    return 0;
    5.14  	}
    5.15      }
    5.16 -    while(unlikely((y = cmpxchg((u64*)&page->count_info, x, nx)) != x));
    5.17 +    while(unlikely((y = cmpxchg_acq((u64*)&page->count_info, x, nx)) != x));
    5.18      return 1;
    5.19  }
    5.20