ia64/xen-unstable

changeset 10418:ee3d10828937

[XEN] New memory_op XENMEM_exchange. Allows atomic
exchange of one memory reservation for another of the
same size, but with different properties.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@dhcp93.uk.xensource.com
date Fri Jun 16 14:43:54 2006 +0100 (2006-06-16)
parents 231e07e22f9c
children 0d1dab1d9b67 2ac74e1df3d7
files xen/arch/ia64/xen/domain.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c xen/arch/x86/shadow.c xen/arch/x86/shadow_public.c xen/common/grant_table.c xen/common/memory.c xen/common/page_alloc.c xen/include/asm-ia64/grant_table.h xen/include/asm-ia64/mm.h xen/include/asm-x86/grant_table.h xen/include/asm-x86/mm.h xen/include/public/memory.h xen/include/xen/mm.h
line diff
     1.1 --- a/xen/arch/ia64/xen/domain.c	Fri Jun 16 10:52:49 2006 +0100
     1.2 +++ b/xen/arch/ia64/xen/domain.c	Fri Jun 16 14:43:54 2006 +0100
     1.3 @@ -1308,10 +1308,10 @@ destroy_grant_host_mapping(unsigned long
     1.4  //XXX heavily depends on the struct page layout.
     1.5  //XXX SMP
     1.6  int
     1.7 -steal_page_for_grant_transfer(struct domain *d, struct page_info *page)
     1.8 +steal_page(struct domain *d, struct page_info *page, unsigned int memflags)
     1.9  {
    1.10  #if 0 /* if big endian */
    1.11 -# error "implement big endian version of steal_page_for_grant_transfer()"
    1.12 +# error "implement big endian version of steal_page()"
    1.13  #endif
    1.14      u32 _d, _nd;
    1.15      u64 x, nx, y;
    1.16 @@ -1371,7 +1371,8 @@ steal_page_for_grant_transfer(struct dom
    1.17       * Unlink from 'd'. At least one reference remains (now anonymous), so
    1.18       * noone else is spinning to try to delete this page from 'd'.
    1.19       */
    1.20 -    d->tot_pages--;
    1.21 +    if ( !(memflags & MEMF_no_refcount) )
    1.22 +        d->tot_pages--;
    1.23      list_del(&page->list);
    1.24  
    1.25      spin_unlock(&d->page_alloc_lock);
     2.1 --- a/xen/arch/x86/domain_build.c	Fri Jun 16 10:52:49 2006 +0100
     2.2 +++ b/xen/arch/x86/domain_build.c	Fri Jun 16 14:43:54 2006 +0100
     2.3 @@ -374,7 +374,7 @@ int construct_dom0(struct domain *d,
     2.4       * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1
     2.5       * mapping covers the allocation.
     2.6       */
     2.7 -    if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
     2.8 +    if ( (page = alloc_domheap_pages(d, order, MEMF_dma)) == NULL )
     2.9          panic("Not enough RAM for domain 0 allocation.\n");
    2.10      alloc_spfn = page_to_mfn(page);
    2.11      alloc_epfn = alloc_spfn + d->tot_pages;
     3.1 --- a/xen/arch/x86/mm.c	Fri Jun 16 10:52:49 2006 +0100
     3.2 +++ b/xen/arch/x86/mm.c	Fri Jun 16 14:43:54 2006 +0100
     3.3 @@ -2598,8 +2598,8 @@ int destroy_grant_host_mapping(
     3.4      return destroy_grant_va_mapping(addr, frame);
     3.5  }
     3.6  
     3.7 -int steal_page_for_grant_transfer(
     3.8 -    struct domain *d, struct page_info *page)
     3.9 +int steal_page(
    3.10 +    struct domain *d, struct page_info *page, unsigned int memflags)
    3.11  {
    3.12      u32 _d, _nd, x, y;
    3.13  
    3.14 @@ -2636,7 +2636,8 @@ int steal_page_for_grant_transfer(
    3.15       * Unlink from 'd'. At least one reference remains (now anonymous), so 
    3.16       * noone else is spinning to try to delete this page from 'd'.
    3.17       */
    3.18 -    d->tot_pages--;
    3.19 +    if ( !(memflags & MEMF_no_refcount) )
    3.20 +        d->tot_pages--;
    3.21      list_del(&page->list);
    3.22  
    3.23      spin_unlock(&d->page_alloc_lock);
     4.1 --- a/xen/arch/x86/shadow.c	Fri Jun 16 10:52:49 2006 +0100
     4.2 +++ b/xen/arch/x86/shadow.c	Fri Jun 16 14:43:54 2006 +0100
     4.3 @@ -292,10 +292,10 @@ alloc_shadow_page(struct domain *d,
     4.4  #elif CONFIG_PAGING_LEVELS >= 3
     4.5          if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
     4.6               psh_type == PGT_l4_shadow )      /* allocated for PAE PDP page */
     4.7 -            page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA);
     4.8 +            page = alloc_domheap_pages(NULL, 0, MEMF_dma);
     4.9          else if ( d->arch.ops->guest_paging_levels == PAGING_L3 &&
    4.10                    (psh_type == PGT_l3_shadow || psh_type == PGT_l4_shadow) )
    4.11 -            page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA); /* allocated for PAE PDP page */
    4.12 +            page = alloc_domheap_pages(NULL, 0, MEMF_dma); /* allocated for PAE PDP page */
    4.13          else
    4.14              page = alloc_domheap_page(NULL);
    4.15  #endif
     5.1 --- a/xen/arch/x86/shadow_public.c	Fri Jun 16 10:52:49 2006 +0100
     5.2 +++ b/xen/arch/x86/shadow_public.c	Fri Jun 16 14:43:54 2006 +0100
     5.3 @@ -43,7 +43,7 @@ int shadow_direct_map_init(struct domain
     5.4      struct page_info *page;
     5.5      l3_pgentry_t *root;
     5.6  
     5.7 -    if ( !(page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA)) )
     5.8 +    if ( !(page = alloc_domheap_pages(NULL, 0, MEMF_dma)) )
     5.9          return 0;
    5.10  
    5.11      root = map_domain_page(page_to_mfn(page));
    5.12 @@ -395,7 +395,7 @@ static void alloc_monitor_pagetable(stru
    5.13  
    5.14      ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get called once */
    5.15  
    5.16 -    m3mfn_info = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA);
    5.17 +    m3mfn_info = alloc_domheap_pages(NULL, 0, MEMF_dma);
    5.18      ASSERT( m3mfn_info );
    5.19  
    5.20      m3mfn = page_to_mfn(m3mfn_info);
     6.1 --- a/xen/common/grant_table.c	Fri Jun 16 10:52:49 2006 +0100
     6.2 +++ b/xen/common/grant_table.c	Fri Jun 16 14:43:54 2006 +0100
     6.3 @@ -634,7 +634,7 @@ gnttab_transfer(
     6.4              goto copyback;
     6.5          }
     6.6  
     6.7 -        if ( steal_page_for_grant_transfer(d, page) < 0 )
     6.8 +        if ( steal_page(d, page, 0) < 0 )
     6.9          {
    6.10              gop.status = GNTST_bad_page;
    6.11              goto copyback;
     7.1 --- a/xen/common/memory.c	Fri Jun 16 10:52:49 2006 +0100
     7.2 +++ b/xen/common/memory.c	Fri Jun 16 14:43:54 2006 +0100
     7.3 @@ -34,7 +34,7 @@ increase_reservation(
     7.4      XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
     7.5      unsigned int   nr_extents,
     7.6      unsigned int   extent_order,
     7.7 -    unsigned int   flags,
     7.8 +    unsigned int   memflags,
     7.9      int           *preempted)
    7.10  {
    7.11      struct page_info *page;
    7.12 @@ -58,11 +58,11 @@ increase_reservation(
    7.13          }
    7.14  
    7.15          if ( unlikely((page = alloc_domheap_pages(
    7.16 -            d, extent_order, flags)) == NULL) )
    7.17 +            d, extent_order, memflags)) == NULL) )
    7.18          {
    7.19              DPRINTK("Could not allocate order=%d extent: "
    7.20 -                    "id=%d flags=%x (%ld of %d)\n",
    7.21 -                    extent_order, d->domain_id, flags, i, nr_extents);
    7.22 +                    "id=%d memflags=%x (%ld of %d)\n",
    7.23 +                    extent_order, d->domain_id, memflags, i, nr_extents);
    7.24              return i;
    7.25          }
    7.26  
    7.27 @@ -84,7 +84,7 @@ populate_physmap(
    7.28      XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
    7.29      unsigned int  nr_extents,
    7.30      unsigned int  extent_order,
    7.31 -    unsigned int  flags,
    7.32 +    unsigned int  memflags,
    7.33      int          *preempted)
    7.34  {
    7.35      struct page_info *page;
    7.36 @@ -111,11 +111,11 @@ populate_physmap(
    7.37              goto out;
    7.38  
    7.39          if ( unlikely((page = alloc_domheap_pages(
    7.40 -            d, extent_order, flags)) == NULL) )
    7.41 +            d, extent_order, memflags)) == NULL) )
    7.42          {
    7.43              DPRINTK("Could not allocate order=%d extent: "
    7.44 -                    "id=%d flags=%x (%ld of %d)\n",
    7.45 -                    extent_order, d->domain_id, flags, i, nr_extents);
    7.46 +                    "id=%d memflags=%x (%ld of %d)\n",
    7.47 +                    extent_order, d->domain_id, memflags, i, nr_extents);
    7.48              goto out;
    7.49          }
    7.50  
    7.51 @@ -183,7 +183,6 @@ decrease_reservation(
    7.52      XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
    7.53      unsigned int   nr_extents,
    7.54      unsigned int   extent_order,
    7.55 -    unsigned int   flags,
    7.56      int           *preempted)
    7.57  {
    7.58      unsigned long i, j;
    7.59 @@ -276,10 +275,234 @@ translate_gpfn_list(
    7.60      return 0;
    7.61  }
    7.62  
    7.63 +static long
    7.64 +memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
    7.65 +{
    7.66 +    struct xen_memory_exchange exch;
    7.67 +    LIST_HEAD(in_chunk_list);
    7.68 +    LIST_HEAD(out_chunk_list);
    7.69 +    unsigned long in_chunk_order, out_chunk_order;
    7.70 +    unsigned long gpfn, gmfn, mfn;
    7.71 +    unsigned long i, j, k;
    7.72 +    unsigned int  memflags = 0;
    7.73 +    long          rc = 0;
    7.74 +    struct domain *d;
    7.75 +    struct page_info *page;
    7.76 +
    7.77 +    if ( copy_from_guest(&exch, arg, 1) )
    7.78 +        return -EFAULT;
    7.79 +
    7.80 +    /* Various sanity checks. */
    7.81 +    if ( (exch.nr_exchanged > exch.in.nr_extents) ||
    7.82 +         /* Input and output domain identifiers match? */
    7.83 +         (exch.in.domid != exch.out.domid) ||
    7.84 +         /* Sizes of input and output lists do not overflow a long? */
    7.85 +         ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
    7.86 +         ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
    7.87 +         /* Sizes of input and output lists match? */
    7.88 +         ((exch.in.nr_extents << exch.in.extent_order) !=
    7.89 +          (exch.out.nr_extents << exch.out.extent_order)) )
    7.90 +    {
    7.91 +        rc = -EINVAL;
    7.92 +        goto fail_early;
    7.93 +    }
    7.94 +
    7.95 +    /* Only privileged guests can allocate multi-page contiguous extents. */
    7.96 +    if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
    7.97 +         !multipage_allocation_permitted(current->domain) )
    7.98 +    {
    7.99 +        rc = -EPERM;
   7.100 +        goto fail_early;
   7.101 +    }
   7.102 +
   7.103 +    if ( (exch.out.address_bits != 0) &&
   7.104 +         (exch.out.address_bits <
   7.105 +          (get_order_from_pages(max_page) + PAGE_SHIFT)) )
   7.106 +    {
   7.107 +        if ( exch.out.address_bits < 31 )
   7.108 +        {
   7.109 +            rc = -ENOMEM;
   7.110 +            goto fail_early;
   7.111 +        }
   7.112 +        memflags = MEMF_dma;
   7.113 +    }
   7.114 +
   7.115 +    guest_handle_add_offset(exch.in.extent_start, exch.nr_exchanged);
   7.116 +    exch.in.nr_extents -= exch.nr_exchanged;
   7.117 +
   7.118 +    if ( exch.in.extent_order <= exch.out.extent_order )
   7.119 +    {
   7.120 +        in_chunk_order  = exch.out.extent_order - exch.in.extent_order;
   7.121 +        out_chunk_order = 0;
   7.122 +        guest_handle_add_offset(
   7.123 +            exch.out.extent_start, exch.nr_exchanged >> in_chunk_order);
   7.124 +        exch.out.nr_extents -= exch.nr_exchanged >> in_chunk_order;
   7.125 +    }
   7.126 +    else
   7.127 +    {
   7.128 +        in_chunk_order  = 0;
   7.129 +        out_chunk_order = exch.in.extent_order - exch.out.extent_order;
   7.130 +        guest_handle_add_offset(
   7.131 +            exch.out.extent_start, exch.nr_exchanged << out_chunk_order);
   7.132 +        exch.out.nr_extents -= exch.nr_exchanged << out_chunk_order;
   7.133 +    }
   7.134 +
   7.135 +    /*
   7.136 +     * Only support exchange on calling domain right now. Otherwise there are
   7.137 +     * tricky corner cases to consider (e.g., DOMF_dying domain).
   7.138 +     */
   7.139 +    if ( unlikely(exch.in.domid != DOMID_SELF) )
   7.140 +    {
   7.141 +        rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
   7.142 +        goto fail_early;
   7.143 +    }
   7.144 +    d = current->domain;
   7.145 +
   7.146 +    for ( i = 0; i < (exch.in.nr_extents >> in_chunk_order); i++ )
   7.147 +    {
   7.148 +        if ( hypercall_preempt_check() )
   7.149 +        {
   7.150 +            exch.nr_exchanged += i << in_chunk_order;
   7.151 +            if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
   7.152 +                return -EFAULT;
   7.153 +            return hypercall_create_continuation(
   7.154 +                __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
   7.155 +        }
   7.156 +
   7.157 +        /* Steal a chunk's worth of input pages from the domain. */
   7.158 +        for ( j = 0; j < (1UL << in_chunk_order); j++ )
   7.159 +        {
   7.160 +            if ( unlikely(__copy_from_guest_offset(
   7.161 +                &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
   7.162 +            {
   7.163 +                rc = -EFAULT;
   7.164 +                goto fail;
   7.165 +            }
   7.166 +
   7.167 +            for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
   7.168 +            {
   7.169 +                mfn = gmfn_to_mfn(d, gmfn + k);
   7.170 +                if ( unlikely(!mfn_valid(mfn)) )
   7.171 +                {
   7.172 +                    rc = -EINVAL;
   7.173 +                    goto fail;
   7.174 +                }
   7.175 +
   7.176 +                page = mfn_to_page(mfn);
   7.177 +
   7.178 +                if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
   7.179 +                {
   7.180 +                    rc = -EINVAL;
   7.181 +                    goto fail;
   7.182 +                }
   7.183 +
   7.184 +                list_add(&page->list, &in_chunk_list);
   7.185 +            }
   7.186 +        }
   7.187 +
   7.188 +        /* Allocate a chunk's worth of anonymous output pages. */
   7.189 +        for ( j = 0; j < (1UL << out_chunk_order); j++ )
   7.190 +        {
   7.191 +            page = alloc_domheap_pages(
   7.192 +                NULL, exch.out.extent_order, memflags);
   7.193 +            if ( unlikely(page == NULL) )
   7.194 +            {
   7.195 +                rc = -ENOMEM;
   7.196 +                goto fail;
   7.197 +            }
   7.198 +
   7.199 +            list_add(&page->list, &out_chunk_list);
   7.200 +        }
   7.201 +
   7.202 +        /*
   7.203 +         * Success! Beyond this point we cannot fail for this chunk.
   7.204 +         */
   7.205 +
   7.206 +        /* Destroy final reference to each input page. */
   7.207 +        while ( !list_empty(&in_chunk_list) )
   7.208 +        {
   7.209 +            page = list_entry(in_chunk_list.next, struct page_info, list);
   7.210 +            list_del(&page->list);
   7.211 +            if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
   7.212 +                BUG();
   7.213 +            mfn = page_to_mfn(page);
   7.214 +            guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
   7.215 +            put_page(page);
   7.216 +        }
   7.217 +
   7.218 +        /* Assign each output page to the domain. */
   7.219 +        j = 0;
   7.220 +        while ( !list_empty(&out_chunk_list) )
   7.221 +        {
   7.222 +            page = list_entry(out_chunk_list.next, struct page_info, list);
   7.223 +            list_del(&page->list);
   7.224 +            if ( assign_pages(d, page, exch.out.extent_order,
   7.225 +                              MEMF_no_refcount) )
   7.226 +                BUG();
   7.227 +
   7.228 +            /* Note that we ignore errors accessing the output extent list. */
   7.229 +            (void)__copy_from_guest_offset(
   7.230 +                &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
   7.231 +
   7.232 +            mfn = page_to_mfn(page);
   7.233 +            if ( unlikely(shadow_mode_translate(d)) )
   7.234 +            {
   7.235 +                for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
   7.236 +                    guest_physmap_add_page(d, gpfn + k, mfn + k);
   7.237 +            }
   7.238 +            else
   7.239 +            {
   7.240 +                for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
   7.241 +                    set_gpfn_from_mfn(mfn + k, gpfn + k);
   7.242 +                (void)__copy_to_guest_offset(
   7.243 +                    exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
   7.244 +            }
   7.245 +
   7.246 +            j++;
   7.247 +        }
   7.248 +        BUG_ON(j != (1UL << out_chunk_order));
   7.249 +    }
   7.250 +
   7.251 +    exch.nr_exchanged += exch.in.nr_extents;
   7.252 +    if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
   7.253 +        rc = -EFAULT;
   7.254 +    return rc;
   7.255 +
   7.256 +    /*
   7.257 +     * Failed a chunk! Free any partial chunk work. Tell caller how many
   7.258 +     * chunks succeeded.
   7.259 +     */
   7.260 + fail:
   7.261 +    /* Reassign any input pages we managed to steal. */
   7.262 +    while ( !list_empty(&in_chunk_list) )
   7.263 +    {
   7.264 +        page = list_entry(in_chunk_list.next, struct page_info, list);
   7.265 +        list_del(&page->list);
   7.266 +        if ( assign_pages(d, page, 0, MEMF_no_refcount) )
   7.267 +            BUG();
   7.268 +    }
   7.269 +
   7.270 +    /* Free any output pages we managed to allocate. */
   7.271 +    while ( !list_empty(&out_chunk_list) )
   7.272 +    {
   7.273 +        page = list_entry(out_chunk_list.next, struct page_info, list);
   7.274 +        list_del(&page->list);
   7.275 +        free_domheap_pages(page, exch.out.extent_order);
   7.276 +    }
   7.277 +
   7.278 +    exch.nr_exchanged += i << in_chunk_order;
   7.279 +
   7.280 + fail_early:
   7.281 +    if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
   7.282 +        rc = -EFAULT;
   7.283 +    return rc;
   7.284 +}
   7.285 +
   7.286  long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
   7.287  {
   7.288      struct domain *d;
   7.289 -    int rc, op, flags = 0, preempted = 0;
   7.290 +    int rc, op, preempted = 0;
   7.291 +    unsigned int memflags = 0;
   7.292      unsigned long start_extent, progress;
   7.293      struct xen_memory_reservation reservation;
   7.294      domid_t domid;
   7.295 @@ -291,16 +514,17 @@ long do_memory_op(unsigned long cmd, XEN
   7.296      case XENMEM_increase_reservation:
   7.297      case XENMEM_decrease_reservation:
   7.298      case XENMEM_populate_physmap:
   7.299 +        start_extent = cmd >> START_EXTENT_SHIFT;
   7.300 +
   7.301          if ( copy_from_guest(&reservation, arg, 1) )
   7.302 -            return -EFAULT;
   7.303 +            return start_extent;
   7.304  
   7.305          /* Is size too large for us to encode a continuation? */
   7.306          if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) )
   7.307 -            return -EINVAL;
   7.308 +            return start_extent;
   7.309  
   7.310 -        start_extent = cmd >> START_EXTENT_SHIFT;
   7.311          if ( unlikely(start_extent > reservation.nr_extents) )
   7.312 -            return -EINVAL;
   7.313 +            return start_extent;
   7.314  
   7.315          if ( !guest_handle_is_null(reservation.extent_start) )
   7.316              guest_handle_add_offset(reservation.extent_start, start_extent);
   7.317 @@ -311,16 +535,15 @@ long do_memory_op(unsigned long cmd, XEN
   7.318                (get_order_from_pages(max_page) + PAGE_SHIFT)) )
   7.319          {
   7.320              if ( reservation.address_bits < 31 )
   7.321 -                return -ENOMEM;
   7.322 -            flags = ALLOC_DOM_DMA;
   7.323 +                return start_extent;
   7.324 +            memflags = MEMF_dma;
   7.325          }
   7.326  
   7.327          if ( likely(reservation.domid == DOMID_SELF) )
   7.328              d = current->domain;
   7.329 -        else if ( !IS_PRIV(current->domain) )
   7.330 -            return -EPERM;
   7.331 -        else if ( (d = find_domain_by_id(reservation.domid)) == NULL )
   7.332 -            return -ESRCH;
   7.333 +        else if ( !IS_PRIV(current->domain) ||
   7.334 +                  ((d = find_domain_by_id(reservation.domid)) == NULL) )
   7.335 +            return start_extent;
   7.336  
   7.337          switch ( op )
   7.338          {
   7.339 @@ -330,7 +553,7 @@ long do_memory_op(unsigned long cmd, XEN
   7.340                  reservation.extent_start,
   7.341                  reservation.nr_extents,
   7.342                  reservation.extent_order,
   7.343 -                flags,
   7.344 +                memflags,
   7.345                  &preempted);
   7.346              break;
   7.347          case XENMEM_decrease_reservation:
   7.348 @@ -339,7 +562,6 @@ long do_memory_op(unsigned long cmd, XEN
   7.349                  reservation.extent_start,
   7.350                  reservation.nr_extents,
   7.351                  reservation.extent_order,
   7.352 -                flags,
   7.353                  &preempted);
   7.354              break;
   7.355          case XENMEM_populate_physmap:
   7.356 @@ -349,7 +571,7 @@ long do_memory_op(unsigned long cmd, XEN
   7.357                  reservation.extent_start,
   7.358                  reservation.nr_extents,
   7.359                  reservation.extent_order,
   7.360 -                flags,
   7.361 +                memflags,
   7.362                  &preempted);
   7.363              break;
   7.364          }
   7.365 @@ -366,6 +588,10 @@ long do_memory_op(unsigned long cmd, XEN
   7.366  
   7.367          break;
   7.368  
   7.369 +    case XENMEM_exchange:
   7.370 +        rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
   7.371 +        break;
   7.372 +
   7.373      case XENMEM_maximum_ram_page:
   7.374          rc = max_page;
   7.375          break;
     8.1 --- a/xen/common/page_alloc.c	Fri Jun 16 10:52:49 2006 +0100
     8.2 +++ b/xen/common/page_alloc.c	Fri Jun 16 14:43:54 2006 +0100
     8.3 @@ -531,16 +531,66 @@ void init_domheap_pages(paddr_t ps, padd
     8.4  }
     8.5  
     8.6  
     8.7 +int assign_pages(
     8.8 +    struct domain *d,
     8.9 +    struct page_info *pg,
    8.10 +    unsigned int order,
    8.11 +    unsigned int memflags)
    8.12 +{
    8.13 +    unsigned long i;
    8.14 +
    8.15 +    spin_lock(&d->page_alloc_lock);
    8.16 +
    8.17 +    if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) )
    8.18 +    {
    8.19 +        DPRINTK("Cannot assign page to domain%d -- dying.\n", d->domain_id);
    8.20 +        goto fail;
    8.21 +    }
    8.22 +
    8.23 +    if ( !(memflags & MEMF_no_refcount) )
    8.24 +    {
    8.25 +        if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
    8.26 +        {
    8.27 +            DPRINTK("Over-allocation for domain %u: %u > %u\n",
    8.28 +                    d->domain_id, d->tot_pages + (1 << order), d->max_pages);
    8.29 +            goto fail;
    8.30 +        }
    8.31 +
    8.32 +        if ( unlikely(d->tot_pages == 0) )
    8.33 +            get_knownalive_domain(d);
    8.34 +
    8.35 +        d->tot_pages += 1 << order;
    8.36 +    }
    8.37 +
    8.38 +    for ( i = 0; i < (1 << order); i++ )
    8.39 +    {
    8.40 +        ASSERT(page_get_owner(&pg[i]) == NULL);
    8.41 +        ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
    8.42 +        page_set_owner(&pg[i], d);
    8.43 +        wmb(); /* Domain pointer must be visible before updating refcnt. */
    8.44 +        pg[i].count_info = PGC_allocated | 1;
    8.45 +        list_add_tail(&pg[i].list, &d->page_list);
    8.46 +    }
    8.47 +
    8.48 +    spin_unlock(&d->page_alloc_lock);
    8.49 +    return 0;
    8.50 +
    8.51 + fail:
    8.52 +    spin_unlock(&d->page_alloc_lock);
    8.53 +    return -1;
    8.54 +}
    8.55 +
    8.56 +
    8.57  struct page_info *alloc_domheap_pages(
    8.58 -    struct domain *d, unsigned int order, unsigned int flags)
    8.59 +    struct domain *d, unsigned int order, unsigned int memflags)
    8.60  {
    8.61      struct page_info *pg = NULL;
    8.62      cpumask_t mask;
    8.63 -    int i;
    8.64 +    unsigned long i;
    8.65  
    8.66      ASSERT(!in_irq());
    8.67  
    8.68 -    if ( !(flags & ALLOC_DOM_DMA) )
    8.69 +    if ( !(memflags & MEMF_dma) )
    8.70      {
    8.71          pg = alloc_heap_pages(MEMZONE_DOM, order);
    8.72          /* Failure? Then check if we can fall back to the DMA pool. */
    8.73 @@ -582,37 +632,11 @@ struct page_info *alloc_domheap_pages(
    8.74          flush_tlb_mask(mask);
    8.75      }
    8.76  
    8.77 -    if ( d == NULL )
    8.78 -        return pg;
    8.79 -
    8.80 -    spin_lock(&d->page_alloc_lock);
    8.81 -
    8.82 -    if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) ||
    8.83 -         unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
    8.84 +    if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
    8.85      {
    8.86 -        DPRINTK("Over-allocation for domain %u: %u > %u\n",
    8.87 -                d->domain_id, d->tot_pages + (1 << order), d->max_pages);
    8.88 -        DPRINTK("...or the domain is dying (%d)\n", 
    8.89 -                !!test_bit(_DOMF_dying, &d->domain_flags));
    8.90 -        spin_unlock(&d->page_alloc_lock);
    8.91          free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
    8.92          return NULL;
    8.93      }
    8.94 -
    8.95 -    if ( unlikely(d->tot_pages == 0) )
    8.96 -        get_knownalive_domain(d);
    8.97 -
    8.98 -    d->tot_pages += 1 << order;
    8.99 -
   8.100 -    for ( i = 0; i < (1 << order); i++ )
   8.101 -    {
   8.102 -        page_set_owner(&pg[i], d);
   8.103 -        wmb(); /* Domain pointer must be visible before updating refcnt. */
   8.104 -        pg[i].count_info |= PGC_allocated | 1;
   8.105 -        list_add_tail(&pg[i].list, &d->page_list);
   8.106 -    }
   8.107 -
   8.108 -    spin_unlock(&d->page_alloc_lock);
   8.109      
   8.110      return pg;
   8.111  }
     9.1 --- a/xen/include/asm-ia64/grant_table.h	Fri Jun 16 10:52:49 2006 +0100
     9.2 +++ b/xen/include/asm-ia64/grant_table.h	Fri Jun 16 14:43:54 2006 +0100
     9.3 @@ -12,16 +12,12 @@
     9.4  #define create_grant_host_mapping(a, f, fl)  0
     9.5  #define destroy_grant_host_mapping(a, f, fl) 0
     9.6  
     9.7 -// for grant transfer
     9.8 -#define steal_page_for_grant_transfer(d, p)  0
     9.9 -
    9.10  #else
    9.11  // for grant map/unmap
    9.12  int create_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned int flags);
    9.13  int destroy_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned int flags);
    9.14  
    9.15  // for grant transfer
    9.16 -int steal_page_for_grant_transfer(struct domain *d, struct page_info *page);
    9.17  void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
    9.18  
    9.19  #endif
    10.1 --- a/xen/include/asm-ia64/mm.h	Fri Jun 16 10:52:49 2006 +0100
    10.2 +++ b/xen/include/asm-ia64/mm.h	Fri Jun 16 14:43:54 2006 +0100
    10.3 @@ -474,4 +474,11 @@ extern unsigned long ____lookup_domain_m
    10.4  /* Arch-specific portion of memory_op hypercall. */
    10.5  #define arch_memory_op(op, arg) (-ENOSYS)
    10.6  
    10.7 +#ifndef CONFIG_XEN_IA64_DOM0_VP
    10.8 +#define steal_page(d, p, f)  0
    10.9 +#else
   10.10 +int steal_page(
   10.11 +    struct domain *d, struct page_info *page, unsigned int memflags);
   10.12 +#endif
   10.13 +
   10.14  #endif /* __ASM_IA64_MM_H__ */
    11.1 --- a/xen/include/asm-x86/grant_table.h	Fri Jun 16 10:52:49 2006 +0100
    11.2 +++ b/xen/include/asm-x86/grant_table.h	Fri Jun 16 14:43:54 2006 +0100
    11.3 @@ -18,9 +18,6 @@ int create_grant_host_mapping(
    11.4  int destroy_grant_host_mapping(
    11.5      unsigned long addr, unsigned long frame, unsigned int flags);
    11.6  
    11.7 -int steal_page_for_grant_transfer(
    11.8 -    struct domain *d, struct page_info *page);
    11.9 -
   11.10  #define gnttab_create_shared_page(d, t, i)                               \
   11.11      do {                                                                 \
   11.12          share_xen_page_with_guest(                                       \
    12.1 --- a/xen/include/asm-x86/mm.h	Fri Jun 16 10:52:49 2006 +0100
    12.2 +++ b/xen/include/asm-x86/mm.h	Fri Jun 16 14:43:54 2006 +0100
    12.3 @@ -389,4 +389,7 @@ int __sync_lazy_execstate(void);
    12.4  long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
    12.5  long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
    12.6  
    12.7 +int steal_page(
    12.8 +    struct domain *d, struct page_info *page, unsigned int memflags);
    12.9 +
   12.10  #endif /* __ASM_X86_MM_H__ */
    13.1 --- a/xen/include/public/memory.h	Fri Jun 16 10:52:49 2006 +0100
    13.2 +++ b/xen/include/public/memory.h	Fri Jun 16 14:43:54 2006 +0100
    13.3 @@ -10,8 +10,8 @@
    13.4  #define __XEN_PUBLIC_MEMORY_H__
    13.5  
    13.6  /*
    13.7 - * Increase or decrease the specified domain's memory reservation. Returns a
    13.8 - * -ve errcode on failure, or the # extents successfully allocated or freed.
    13.9 + * Increase or decrease the specified domain's memory reservation. Returns the
   13.10 + * number of extents successfully allocated or freed.
   13.11   * arg == addr of struct xen_memory_reservation.
   13.12   */
   13.13  #define XENMEM_increase_reservation 0
   13.14 @@ -48,12 +48,54 @@ struct xen_memory_reservation {
   13.15       * Unprivileged domains can specify only DOMID_SELF.
   13.16       */
   13.17      domid_t        domid;
   13.18 -
   13.19  };
   13.20  typedef struct xen_memory_reservation xen_memory_reservation_t;
   13.21  DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
   13.22  
   13.23  /*
   13.24 + * An atomic exchange of memory pages. If return code is zero then
   13.25 + * @out.extent_list provides GMFNs of the newly-allocated memory.
   13.26 + * Returns zero on complete success, otherwise a negative error code.
   13.27 + * On complete success then always @nr_exchanged == @in.nr_extents.
   13.28 + * On partial success @nr_exchanged indicates how much work was done.
   13.29 + */
   13.30 +#define XENMEM_exchange             11
   13.31 +struct xen_memory_exchange {
   13.32 +    /*
   13.33 +     * [IN] Details of memory extents to be exchanged (GMFN bases).
   13.34 +     * Note that @in.address_bits is ignored and unused.
   13.35 +     */
   13.36 +    struct xen_memory_reservation in;
   13.37 +
   13.38 +    /*
   13.39 +     * [IN/OUT] Details of new memory extents.
   13.40 +     * We require that:
   13.41 +     *  1. @in.domid == @out.domid
   13.42 +     *  2. @in.nr_extents  << @in.extent_order == 
   13.43 +     *     @out.nr_extents << @out.extent_order
   13.44 +     *  3. @in.extent_start and @out.extent_start lists must not overlap
   13.45 +     *  4. @out.extent_start lists GPFN bases to be populated
   13.46 +     *  5. @out.extent_start is overwritten with allocated GMFN bases
   13.47 +     */
   13.48 +    struct xen_memory_reservation out;
   13.49 +
   13.50 +    /*
   13.51 +     * [OUT] Number of input extents that were successfully exchanged:
   13.52 +     *  1. The first @nr_exchanged input extents were successfully
   13.53 +     *     deallocated.
   13.54 +     *  2. The corresponding first entries in the output extent list correctly
   13.55 +     *     indicate the GMFNs that were successfully exchanged.
   13.56 +     *  3. All other input and output extents are untouched.
   13.57 +     *  4. If not all input exents are exchanged then the return code of this
   13.58 +     *     command will be non-zero.
   13.59 +     *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
   13.60 +     */
   13.61 +    unsigned long nr_exchanged;
   13.62 +};
   13.63 +typedef struct xen_memory_exchange xen_memory_exchange_t;
   13.64 +DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);
   13.65 +
   13.66 +/*
   13.67   * Returns the maximum machine frame number of mapped RAM in this system.
   13.68   * This command always succeeds (it never returns an error code).
   13.69   * arg == NULL.
    14.1 --- a/xen/include/xen/mm.h	Fri Jun 16 10:52:49 2006 +0100
    14.2 +++ b/xen/include/xen/mm.h	Fri Jun 16 14:43:54 2006 +0100
    14.3 @@ -60,13 +60,23 @@ void free_xenheap_pages(void *v, unsigne
    14.4  /* Domain suballocator. These functions are *not* interrupt-safe.*/
    14.5  void init_domheap_pages(paddr_t ps, paddr_t pe);
    14.6  struct page_info *alloc_domheap_pages(
    14.7 -    struct domain *d, unsigned int order, unsigned int flags);
    14.8 +    struct domain *d, unsigned int order, unsigned int memflags);
    14.9  void free_domheap_pages(struct page_info *pg, unsigned int order);
   14.10  unsigned long avail_domheap_pages(void);
   14.11  #define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
   14.12  #define free_domheap_page(p)  (free_domheap_pages(p,0))
   14.13  
   14.14 -#define ALLOC_DOM_DMA 1
   14.15 +int assign_pages(
   14.16 +    struct domain *d,
   14.17 +    struct page_info *pg,
   14.18 +    unsigned int order,
   14.19 +    unsigned int memflags);
   14.20 +
   14.21 +/* memflags: */
   14.22 +#define _MEMF_dma         0
   14.23 +#define  MEMF_dma         (1U<<_MEMF_dma)
   14.24 +#define _MEMF_no_refcount 1
   14.25 +#define  MEMF_no_refcount (1U<<_MEMF_no_refcount)
   14.26  
   14.27  #ifdef CONFIG_PAGEALLOC_MAX_ORDER
   14.28  #define MAX_ORDER CONFIG_PAGEALLOC_MAX_ORDER