direct-io.hg

changeset 10422:0d1dab1d9b67

merge with xen-unstable.hg
author awilliam@xenbuild.aw
date Fri Jun 16 10:18:54 2006 -0600 (2006-06-16)
parents 81d35c0d964f ee3d10828937
children 05ab081f3c67 dcb50b04faec
files xen/arch/ia64/xen/domain.c xen/arch/ia64/xen/mm.c xen/include/asm-ia64/mm.h
line diff
     2.1 --- a/xen/arch/ia64/xen/mm.c	Fri Jun 16 09:10:08 2006 -0600
     2.2 +++ b/xen/arch/ia64/xen/mm.c	Fri Jun 16 10:18:54 2006 -0600
     2.3 @@ -790,7 +790,7 @@ assign_domain_page_replace(struct domain
     2.4  }
     2.5  
     2.6  // caller must get_page(new_page) before
     2.7 -// Only steal_page_for_grant_transfer() calls this function.
     2.8 +// Only steal_page() calls this function.
     2.9  static int
    2.10  assign_domain_page_cmpxchg_rel(struct domain* d, unsigned long mpaddr,
    2.11                                 struct page_info* old_page,
    2.12 @@ -1002,10 +1002,10 @@ destroy_grant_host_mapping(unsigned long
    2.13  
    2.14  // heavily depends on the struct page layout.
    2.15  int
    2.16 -steal_page_for_grant_transfer(struct domain *d, struct page_info *page)
    2.17 +steal_page(struct domain *d, struct page_info *page, unsigned int memflags)
    2.18  {
    2.19  #if 0 /* if big endian */
    2.20 -# error "implement big endian version of steal_page_for_grant_transfer()"
    2.21 +# error "implement big endian version of steal_page()"
    2.22  #endif
    2.23      u32 _d, _nd;
    2.24      u64 x, nx, y;
    2.25 @@ -1095,7 +1095,8 @@ steal_page_for_grant_transfer(struct dom
    2.26       * Unlink from 'd'. At least one reference remains (now anonymous), so
    2.27       * noone else is spinning to try to delete this page from 'd'.
    2.28       */
    2.29 -    d->tot_pages--;
    2.30 +    if ( !(memflags & MEMF_no_refcount) )
    2.31 +        d->tot_pages--;
    2.32      list_del(&page->list);
    2.33  
    2.34      spin_unlock(&d->page_alloc_lock);
     3.1 --- a/xen/arch/x86/domain_build.c	Fri Jun 16 09:10:08 2006 -0600
     3.2 +++ b/xen/arch/x86/domain_build.c	Fri Jun 16 10:18:54 2006 -0600
     3.3 @@ -374,7 +374,7 @@ int construct_dom0(struct domain *d,
     3.4       * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1
     3.5       * mapping covers the allocation.
     3.6       */
     3.7 -    if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
     3.8 +    if ( (page = alloc_domheap_pages(d, order, MEMF_dma)) == NULL )
     3.9          panic("Not enough RAM for domain 0 allocation.\n");
    3.10      alloc_spfn = page_to_mfn(page);
    3.11      alloc_epfn = alloc_spfn + d->tot_pages;
     4.1 --- a/xen/arch/x86/mm.c	Fri Jun 16 09:10:08 2006 -0600
     4.2 +++ b/xen/arch/x86/mm.c	Fri Jun 16 10:18:54 2006 -0600
     4.3 @@ -2598,8 +2598,8 @@ int destroy_grant_host_mapping(
     4.4      return destroy_grant_va_mapping(addr, frame);
     4.5  }
     4.6  
     4.7 -int steal_page_for_grant_transfer(
     4.8 -    struct domain *d, struct page_info *page)
     4.9 +int steal_page(
    4.10 +    struct domain *d, struct page_info *page, unsigned int memflags)
    4.11  {
    4.12      u32 _d, _nd, x, y;
    4.13  
    4.14 @@ -2636,7 +2636,8 @@ int steal_page_for_grant_transfer(
    4.15       * Unlink from 'd'. At least one reference remains (now anonymous), so 
    4.16       * noone else is spinning to try to delete this page from 'd'.
    4.17       */
    4.18 -    d->tot_pages--;
    4.19 +    if ( !(memflags & MEMF_no_refcount) )
    4.20 +        d->tot_pages--;
    4.21      list_del(&page->list);
    4.22  
    4.23      spin_unlock(&d->page_alloc_lock);
     5.1 --- a/xen/arch/x86/shadow.c	Fri Jun 16 09:10:08 2006 -0600
     5.2 +++ b/xen/arch/x86/shadow.c	Fri Jun 16 10:18:54 2006 -0600
     5.3 @@ -292,10 +292,10 @@ alloc_shadow_page(struct domain *d,
     5.4  #elif CONFIG_PAGING_LEVELS >= 3
     5.5          if ( d->arch.ops->guest_paging_levels == PAGING_L2 &&
     5.6               psh_type == PGT_l4_shadow )      /* allocated for PAE PDP page */
     5.7 -            page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA);
     5.8 +            page = alloc_domheap_pages(NULL, 0, MEMF_dma);
     5.9          else if ( d->arch.ops->guest_paging_levels == PAGING_L3 &&
    5.10                    (psh_type == PGT_l3_shadow || psh_type == PGT_l4_shadow) )
    5.11 -            page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA); /* allocated for PAE PDP page */
    5.12 +            page = alloc_domheap_pages(NULL, 0, MEMF_dma); /* allocated for PAE PDP page */
    5.13          else
    5.14              page = alloc_domheap_page(NULL);
    5.15  #endif
     6.1 --- a/xen/arch/x86/shadow_public.c	Fri Jun 16 09:10:08 2006 -0600
     6.2 +++ b/xen/arch/x86/shadow_public.c	Fri Jun 16 10:18:54 2006 -0600
     6.3 @@ -43,7 +43,7 @@ int shadow_direct_map_init(struct domain
     6.4      struct page_info *page;
     6.5      l3_pgentry_t *root;
     6.6  
     6.7 -    if ( !(page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA)) )
     6.8 +    if ( !(page = alloc_domheap_pages(NULL, 0, MEMF_dma)) )
     6.9          return 0;
    6.10  
    6.11      root = map_domain_page(page_to_mfn(page));
    6.12 @@ -395,7 +395,7 @@ static void alloc_monitor_pagetable(stru
    6.13  
    6.14      ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get called once */
    6.15  
    6.16 -    m3mfn_info = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA);
    6.17 +    m3mfn_info = alloc_domheap_pages(NULL, 0, MEMF_dma);
    6.18      ASSERT( m3mfn_info );
    6.19  
    6.20      m3mfn = page_to_mfn(m3mfn_info);
     7.1 --- a/xen/common/grant_table.c	Fri Jun 16 09:10:08 2006 -0600
     7.2 +++ b/xen/common/grant_table.c	Fri Jun 16 10:18:54 2006 -0600
     7.3 @@ -634,7 +634,7 @@ gnttab_transfer(
     7.4              goto copyback;
     7.5          }
     7.6  
     7.7 -        if ( steal_page_for_grant_transfer(d, page) < 0 )
     7.8 +        if ( steal_page(d, page, 0) < 0 )
     7.9          {
    7.10              gop.status = GNTST_bad_page;
    7.11              goto copyback;
     8.1 --- a/xen/common/memory.c	Fri Jun 16 09:10:08 2006 -0600
     8.2 +++ b/xen/common/memory.c	Fri Jun 16 10:18:54 2006 -0600
     8.3 @@ -34,7 +34,7 @@ increase_reservation(
     8.4      XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
     8.5      unsigned int   nr_extents,
     8.6      unsigned int   extent_order,
     8.7 -    unsigned int   flags,
     8.8 +    unsigned int   memflags,
     8.9      int           *preempted)
    8.10  {
    8.11      struct page_info *page;
    8.12 @@ -58,11 +58,11 @@ increase_reservation(
    8.13          }
    8.14  
    8.15          if ( unlikely((page = alloc_domheap_pages(
    8.16 -            d, extent_order, flags)) == NULL) )
    8.17 +            d, extent_order, memflags)) == NULL) )
    8.18          {
    8.19              DPRINTK("Could not allocate order=%d extent: "
    8.20 -                    "id=%d flags=%x (%ld of %d)\n",
    8.21 -                    extent_order, d->domain_id, flags, i, nr_extents);
    8.22 +                    "id=%d memflags=%x (%ld of %d)\n",
    8.23 +                    extent_order, d->domain_id, memflags, i, nr_extents);
    8.24              return i;
    8.25          }
    8.26  
    8.27 @@ -84,7 +84,7 @@ populate_physmap(
    8.28      XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
    8.29      unsigned int  nr_extents,
    8.30      unsigned int  extent_order,
    8.31 -    unsigned int  flags,
    8.32 +    unsigned int  memflags,
    8.33      int          *preempted)
    8.34  {
    8.35      struct page_info *page;
    8.36 @@ -111,11 +111,11 @@ populate_physmap(
    8.37              goto out;
    8.38  
    8.39          if ( unlikely((page = alloc_domheap_pages(
    8.40 -            d, extent_order, flags)) == NULL) )
    8.41 +            d, extent_order, memflags)) == NULL) )
    8.42          {
    8.43              DPRINTK("Could not allocate order=%d extent: "
    8.44 -                    "id=%d flags=%x (%ld of %d)\n",
    8.45 -                    extent_order, d->domain_id, flags, i, nr_extents);
    8.46 +                    "id=%d memflags=%x (%ld of %d)\n",
    8.47 +                    extent_order, d->domain_id, memflags, i, nr_extents);
    8.48              goto out;
    8.49          }
    8.50  
    8.51 @@ -183,7 +183,6 @@ decrease_reservation(
    8.52      XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
    8.53      unsigned int   nr_extents,
    8.54      unsigned int   extent_order,
    8.55 -    unsigned int   flags,
    8.56      int           *preempted)
    8.57  {
    8.58      unsigned long i, j;
    8.59 @@ -276,10 +275,234 @@ translate_gpfn_list(
    8.60      return 0;
    8.61  }
    8.62  
    8.63 +static long
    8.64 +memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
    8.65 +{
    8.66 +    struct xen_memory_exchange exch;
    8.67 +    LIST_HEAD(in_chunk_list);
    8.68 +    LIST_HEAD(out_chunk_list);
    8.69 +    unsigned long in_chunk_order, out_chunk_order;
    8.70 +    unsigned long gpfn, gmfn, mfn;
    8.71 +    unsigned long i, j, k;
    8.72 +    unsigned int  memflags = 0;
    8.73 +    long          rc = 0;
    8.74 +    struct domain *d;
    8.75 +    struct page_info *page;
    8.76 +
    8.77 +    if ( copy_from_guest(&exch, arg, 1) )
    8.78 +        return -EFAULT;
    8.79 +
    8.80 +    /* Various sanity checks. */
    8.81 +    if ( (exch.nr_exchanged > exch.in.nr_extents) ||
    8.82 +         /* Input and output domain identifiers match? */
    8.83 +         (exch.in.domid != exch.out.domid) ||
    8.84 +         /* Sizes of input and output lists do not overflow a long? */
    8.85 +         ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
    8.86 +         ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
    8.87 +         /* Sizes of input and output lists match? */
    8.88 +         ((exch.in.nr_extents << exch.in.extent_order) !=
    8.89 +          (exch.out.nr_extents << exch.out.extent_order)) )
    8.90 +    {
    8.91 +        rc = -EINVAL;
    8.92 +        goto fail_early;
    8.93 +    }
    8.94 +
    8.95 +    /* Only privileged guests can allocate multi-page contiguous extents. */
    8.96 +    if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
    8.97 +         !multipage_allocation_permitted(current->domain) )
    8.98 +    {
    8.99 +        rc = -EPERM;
   8.100 +        goto fail_early;
   8.101 +    }
   8.102 +
   8.103 +    if ( (exch.out.address_bits != 0) &&
   8.104 +         (exch.out.address_bits <
   8.105 +          (get_order_from_pages(max_page) + PAGE_SHIFT)) )
   8.106 +    {
   8.107 +        if ( exch.out.address_bits < 31 )
   8.108 +        {
   8.109 +            rc = -ENOMEM;
   8.110 +            goto fail_early;
   8.111 +        }
   8.112 +        memflags = MEMF_dma;
   8.113 +    }
   8.114 +
   8.115 +    guest_handle_add_offset(exch.in.extent_start, exch.nr_exchanged);
   8.116 +    exch.in.nr_extents -= exch.nr_exchanged;
   8.117 +
   8.118 +    if ( exch.in.extent_order <= exch.out.extent_order )
   8.119 +    {
   8.120 +        in_chunk_order  = exch.out.extent_order - exch.in.extent_order;
   8.121 +        out_chunk_order = 0;
   8.122 +        guest_handle_add_offset(
   8.123 +            exch.out.extent_start, exch.nr_exchanged >> in_chunk_order);
   8.124 +        exch.out.nr_extents -= exch.nr_exchanged >> in_chunk_order;
   8.125 +    }
   8.126 +    else
   8.127 +    {
   8.128 +        in_chunk_order  = 0;
   8.129 +        out_chunk_order = exch.in.extent_order - exch.out.extent_order;
   8.130 +        guest_handle_add_offset(
   8.131 +            exch.out.extent_start, exch.nr_exchanged << out_chunk_order);
   8.132 +        exch.out.nr_extents -= exch.nr_exchanged << out_chunk_order;
   8.133 +    }
   8.134 +
   8.135 +    /*
   8.136 +     * Only support exchange on calling domain right now. Otherwise there are
   8.137 +     * tricky corner cases to consider (e.g., DOMF_dying domain).
   8.138 +     */
   8.139 +    if ( unlikely(exch.in.domid != DOMID_SELF) )
   8.140 +    {
   8.141 +        rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
   8.142 +        goto fail_early;
   8.143 +    }
   8.144 +    d = current->domain;
   8.145 +
   8.146 +    for ( i = 0; i < (exch.in.nr_extents >> in_chunk_order); i++ )
   8.147 +    {
   8.148 +        if ( hypercall_preempt_check() )
   8.149 +        {
   8.150 +            exch.nr_exchanged += i << in_chunk_order;
   8.151 +            if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
   8.152 +                return -EFAULT;
   8.153 +            return hypercall_create_continuation(
   8.154 +                __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
   8.155 +        }
   8.156 +
   8.157 +        /* Steal a chunk's worth of input pages from the domain. */
   8.158 +        for ( j = 0; j < (1UL << in_chunk_order); j++ )
   8.159 +        {
   8.160 +            if ( unlikely(__copy_from_guest_offset(
   8.161 +                &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
   8.162 +            {
   8.163 +                rc = -EFAULT;
   8.164 +                goto fail;
   8.165 +            }
   8.166 +
   8.167 +            for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
   8.168 +            {
   8.169 +                mfn = gmfn_to_mfn(d, gmfn + k);
   8.170 +                if ( unlikely(!mfn_valid(mfn)) )
   8.171 +                {
   8.172 +                    rc = -EINVAL;
   8.173 +                    goto fail;
   8.174 +                }
   8.175 +
   8.176 +                page = mfn_to_page(mfn);
   8.177 +
   8.178 +                if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
   8.179 +                {
   8.180 +                    rc = -EINVAL;
   8.181 +                    goto fail;
   8.182 +                }
   8.183 +
   8.184 +                list_add(&page->list, &in_chunk_list);
   8.185 +            }
   8.186 +        }
   8.187 +
   8.188 +        /* Allocate a chunk's worth of anonymous output pages. */
   8.189 +        for ( j = 0; j < (1UL << out_chunk_order); j++ )
   8.190 +        {
   8.191 +            page = alloc_domheap_pages(
   8.192 +                NULL, exch.out.extent_order, memflags);
   8.193 +            if ( unlikely(page == NULL) )
   8.194 +            {
   8.195 +                rc = -ENOMEM;
   8.196 +                goto fail;
   8.197 +            }
   8.198 +
   8.199 +            list_add(&page->list, &out_chunk_list);
   8.200 +        }
   8.201 +
   8.202 +        /*
   8.203 +         * Success! Beyond this point we cannot fail for this chunk.
   8.204 +         */
   8.205 +
   8.206 +        /* Destroy final reference to each input page. */
   8.207 +        while ( !list_empty(&in_chunk_list) )
   8.208 +        {
   8.209 +            page = list_entry(in_chunk_list.next, struct page_info, list);
   8.210 +            list_del(&page->list);
   8.211 +            if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
   8.212 +                BUG();
   8.213 +            mfn = page_to_mfn(page);
   8.214 +            guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
   8.215 +            put_page(page);
   8.216 +        }
   8.217 +
   8.218 +        /* Assign each output page to the domain. */
   8.219 +        j = 0;
   8.220 +        while ( !list_empty(&out_chunk_list) )
   8.221 +        {
   8.222 +            page = list_entry(out_chunk_list.next, struct page_info, list);
   8.223 +            list_del(&page->list);
   8.224 +            if ( assign_pages(d, page, exch.out.extent_order,
   8.225 +                              MEMF_no_refcount) )
   8.226 +                BUG();
   8.227 +
   8.228 +            /* Note that we ignore errors accessing the output extent list. */
   8.229 +            (void)__copy_from_guest_offset(
   8.230 +                &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
   8.231 +
   8.232 +            mfn = page_to_mfn(page);
   8.233 +            if ( unlikely(shadow_mode_translate(d)) )
   8.234 +            {
   8.235 +                for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
   8.236 +                    guest_physmap_add_page(d, gpfn + k, mfn + k);
   8.237 +            }
   8.238 +            else
   8.239 +            {
   8.240 +                for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
   8.241 +                    set_gpfn_from_mfn(mfn + k, gpfn + k);
   8.242 +                (void)__copy_to_guest_offset(
   8.243 +                    exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
   8.244 +            }
   8.245 +
   8.246 +            j++;
   8.247 +        }
   8.248 +        BUG_ON(j != (1UL << out_chunk_order));
   8.249 +    }
   8.250 +
   8.251 +    exch.nr_exchanged += exch.in.nr_extents;
   8.252 +    if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
   8.253 +        rc = -EFAULT;
   8.254 +    return rc;
   8.255 +
   8.256 +    /*
   8.257 +     * Failed a chunk! Free any partial chunk work. Tell caller how many
   8.258 +     * chunks succeeded.
   8.259 +     */
   8.260 + fail:
   8.261 +    /* Reassign any input pages we managed to steal. */
   8.262 +    while ( !list_empty(&in_chunk_list) )
   8.263 +    {
   8.264 +        page = list_entry(in_chunk_list.next, struct page_info, list);
   8.265 +        list_del(&page->list);
   8.266 +        if ( assign_pages(d, page, 0, MEMF_no_refcount) )
   8.267 +            BUG();
   8.268 +    }
   8.269 +
   8.270 +    /* Free any output pages we managed to allocate. */
   8.271 +    while ( !list_empty(&out_chunk_list) )
   8.272 +    {
   8.273 +        page = list_entry(out_chunk_list.next, struct page_info, list);
   8.274 +        list_del(&page->list);
   8.275 +        free_domheap_pages(page, exch.out.extent_order);
   8.276 +    }
   8.277 +
   8.278 +    exch.nr_exchanged += i << in_chunk_order;
   8.279 +
   8.280 + fail_early:
   8.281 +    if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
   8.282 +        rc = -EFAULT;
   8.283 +    return rc;
   8.284 +}
   8.285 +
   8.286  long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
   8.287  {
   8.288      struct domain *d;
   8.289 -    int rc, op, flags = 0, preempted = 0;
   8.290 +    int rc, op, preempted = 0;
   8.291 +    unsigned int memflags = 0;
   8.292      unsigned long start_extent, progress;
   8.293      struct xen_memory_reservation reservation;
   8.294      domid_t domid;
   8.295 @@ -291,16 +514,17 @@ long do_memory_op(unsigned long cmd, XEN
   8.296      case XENMEM_increase_reservation:
   8.297      case XENMEM_decrease_reservation:
   8.298      case XENMEM_populate_physmap:
   8.299 +        start_extent = cmd >> START_EXTENT_SHIFT;
   8.300 +
   8.301          if ( copy_from_guest(&reservation, arg, 1) )
   8.302 -            return -EFAULT;
   8.303 +            return start_extent;
   8.304  
   8.305          /* Is size too large for us to encode a continuation? */
   8.306          if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) )
   8.307 -            return -EINVAL;
   8.308 +            return start_extent;
   8.309  
   8.310 -        start_extent = cmd >> START_EXTENT_SHIFT;
   8.311          if ( unlikely(start_extent > reservation.nr_extents) )
   8.312 -            return -EINVAL;
   8.313 +            return start_extent;
   8.314  
   8.315          if ( !guest_handle_is_null(reservation.extent_start) )
   8.316              guest_handle_add_offset(reservation.extent_start, start_extent);
   8.317 @@ -311,16 +535,15 @@ long do_memory_op(unsigned long cmd, XEN
   8.318                (get_order_from_pages(max_page) + PAGE_SHIFT)) )
   8.319          {
   8.320              if ( reservation.address_bits < 31 )
   8.321 -                return -ENOMEM;
   8.322 -            flags = ALLOC_DOM_DMA;
   8.323 +                return start_extent;
   8.324 +            memflags = MEMF_dma;
   8.325          }
   8.326  
   8.327          if ( likely(reservation.domid == DOMID_SELF) )
   8.328              d = current->domain;
   8.329 -        else if ( !IS_PRIV(current->domain) )
   8.330 -            return -EPERM;
   8.331 -        else if ( (d = find_domain_by_id(reservation.domid)) == NULL )
   8.332 -            return -ESRCH;
   8.333 +        else if ( !IS_PRIV(current->domain) ||
   8.334 +                  ((d = find_domain_by_id(reservation.domid)) == NULL) )
   8.335 +            return start_extent;
   8.336  
   8.337          switch ( op )
   8.338          {
   8.339 @@ -330,7 +553,7 @@ long do_memory_op(unsigned long cmd, XEN
   8.340                  reservation.extent_start,
   8.341                  reservation.nr_extents,
   8.342                  reservation.extent_order,
   8.343 -                flags,
   8.344 +                memflags,
   8.345                  &preempted);
   8.346              break;
   8.347          case XENMEM_decrease_reservation:
   8.348 @@ -339,7 +562,6 @@ long do_memory_op(unsigned long cmd, XEN
   8.349                  reservation.extent_start,
   8.350                  reservation.nr_extents,
   8.351                  reservation.extent_order,
   8.352 -                flags,
   8.353                  &preempted);
   8.354              break;
   8.355          case XENMEM_populate_physmap:
   8.356 @@ -349,7 +571,7 @@ long do_memory_op(unsigned long cmd, XEN
   8.357                  reservation.extent_start,
   8.358                  reservation.nr_extents,
   8.359                  reservation.extent_order,
   8.360 -                flags,
   8.361 +                memflags,
   8.362                  &preempted);
   8.363              break;
   8.364          }
   8.365 @@ -366,6 +588,10 @@ long do_memory_op(unsigned long cmd, XEN
   8.366  
   8.367          break;
   8.368  
   8.369 +    case XENMEM_exchange:
   8.370 +        rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
   8.371 +        break;
   8.372 +
   8.373      case XENMEM_maximum_ram_page:
   8.374          rc = max_page;
   8.375          break;
     9.1 --- a/xen/common/page_alloc.c	Fri Jun 16 09:10:08 2006 -0600
     9.2 +++ b/xen/common/page_alloc.c	Fri Jun 16 10:18:54 2006 -0600
     9.3 @@ -531,16 +531,66 @@ void init_domheap_pages(paddr_t ps, padd
     9.4  }
     9.5  
     9.6  
     9.7 +int assign_pages(
     9.8 +    struct domain *d,
     9.9 +    struct page_info *pg,
    9.10 +    unsigned int order,
    9.11 +    unsigned int memflags)
    9.12 +{
    9.13 +    unsigned long i;
    9.14 +
    9.15 +    spin_lock(&d->page_alloc_lock);
    9.16 +
    9.17 +    if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) )
    9.18 +    {
    9.19 +        DPRINTK("Cannot assign page to domain%d -- dying.\n", d->domain_id);
    9.20 +        goto fail;
    9.21 +    }
    9.22 +
    9.23 +    if ( !(memflags & MEMF_no_refcount) )
    9.24 +    {
    9.25 +        if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
    9.26 +        {
    9.27 +            DPRINTK("Over-allocation for domain %u: %u > %u\n",
    9.28 +                    d->domain_id, d->tot_pages + (1 << order), d->max_pages);
    9.29 +            goto fail;
    9.30 +        }
    9.31 +
    9.32 +        if ( unlikely(d->tot_pages == 0) )
    9.33 +            get_knownalive_domain(d);
    9.34 +
    9.35 +        d->tot_pages += 1 << order;
    9.36 +    }
    9.37 +
    9.38 +    for ( i = 0; i < (1 << order); i++ )
    9.39 +    {
    9.40 +        ASSERT(page_get_owner(&pg[i]) == NULL);
    9.41 +        ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
    9.42 +        page_set_owner(&pg[i], d);
    9.43 +        wmb(); /* Domain pointer must be visible before updating refcnt. */
    9.44 +        pg[i].count_info = PGC_allocated | 1;
    9.45 +        list_add_tail(&pg[i].list, &d->page_list);
    9.46 +    }
    9.47 +
    9.48 +    spin_unlock(&d->page_alloc_lock);
    9.49 +    return 0;
    9.50 +
    9.51 + fail:
    9.52 +    spin_unlock(&d->page_alloc_lock);
    9.53 +    return -1;
    9.54 +}
    9.55 +
    9.56 +
    9.57  struct page_info *alloc_domheap_pages(
    9.58 -    struct domain *d, unsigned int order, unsigned int flags)
    9.59 +    struct domain *d, unsigned int order, unsigned int memflags)
    9.60  {
    9.61      struct page_info *pg = NULL;
    9.62      cpumask_t mask;
    9.63 -    int i;
    9.64 +    unsigned long i;
    9.65  
    9.66      ASSERT(!in_irq());
    9.67  
    9.68 -    if ( !(flags & ALLOC_DOM_DMA) )
    9.69 +    if ( !(memflags & MEMF_dma) )
    9.70      {
    9.71          pg = alloc_heap_pages(MEMZONE_DOM, order);
    9.72          /* Failure? Then check if we can fall back to the DMA pool. */
    9.73 @@ -582,37 +632,11 @@ struct page_info *alloc_domheap_pages(
    9.74          flush_tlb_mask(mask);
    9.75      }
    9.76  
    9.77 -    if ( d == NULL )
    9.78 -        return pg;
    9.79 -
    9.80 -    spin_lock(&d->page_alloc_lock);
    9.81 -
    9.82 -    if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) ||
    9.83 -         unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
    9.84 +    if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
    9.85      {
    9.86 -        DPRINTK("Over-allocation for domain %u: %u > %u\n",
    9.87 -                d->domain_id, d->tot_pages + (1 << order), d->max_pages);
    9.88 -        DPRINTK("...or the domain is dying (%d)\n", 
    9.89 -                !!test_bit(_DOMF_dying, &d->domain_flags));
    9.90 -        spin_unlock(&d->page_alloc_lock);
    9.91          free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
    9.92          return NULL;
    9.93      }
    9.94 -
    9.95 -    if ( unlikely(d->tot_pages == 0) )
    9.96 -        get_knownalive_domain(d);
    9.97 -
    9.98 -    d->tot_pages += 1 << order;
    9.99 -
   9.100 -    for ( i = 0; i < (1 << order); i++ )
   9.101 -    {
   9.102 -        page_set_owner(&pg[i], d);
   9.103 -        wmb(); /* Domain pointer must be visible before updating refcnt. */
   9.104 -        pg[i].count_info |= PGC_allocated | 1;
   9.105 -        list_add_tail(&pg[i].list, &d->page_list);
   9.106 -    }
   9.107 -
   9.108 -    spin_unlock(&d->page_alloc_lock);
   9.109      
   9.110      return pg;
   9.111  }
    10.1 --- a/xen/include/asm-ia64/grant_table.h	Fri Jun 16 09:10:08 2006 -0600
    10.2 +++ b/xen/include/asm-ia64/grant_table.h	Fri Jun 16 10:18:54 2006 -0600
    10.3 @@ -12,16 +12,12 @@
    10.4  #define create_grant_host_mapping(a, f, fl)  0
    10.5  #define destroy_grant_host_mapping(a, f, fl) 0
    10.6  
    10.7 -// for grant transfer
    10.8 -#define steal_page_for_grant_transfer(d, p)  0
    10.9 -
   10.10  #else
   10.11  // for grant map/unmap
   10.12  int create_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned int flags);
   10.13  int destroy_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned int flags);
   10.14  
   10.15  // for grant transfer
   10.16 -int steal_page_for_grant_transfer(struct domain *d, struct page_info *page);
   10.17  void guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
   10.18  
   10.19  #endif
    11.1 --- a/xen/include/asm-ia64/mm.h	Fri Jun 16 09:10:08 2006 -0600
    11.2 +++ b/xen/include/asm-ia64/mm.h	Fri Jun 16 10:18:54 2006 -0600
    11.3 @@ -496,4 +496,11 @@ extern u64 translate_domain_pte(u64 ptev
    11.4  /* Arch-specific portion of memory_op hypercall. */
    11.5  #define arch_memory_op(op, arg) (-ENOSYS)
    11.6  
    11.7 +#ifndef CONFIG_XEN_IA64_DOM0_VP
    11.8 +#define steal_page(d, p, f)  0
    11.9 +#else
   11.10 +int steal_page(
   11.11 +    struct domain *d, struct page_info *page, unsigned int memflags);
   11.12 +#endif
   11.13 +
   11.14  #endif /* __ASM_IA64_MM_H__ */
    12.1 --- a/xen/include/asm-x86/grant_table.h	Fri Jun 16 09:10:08 2006 -0600
    12.2 +++ b/xen/include/asm-x86/grant_table.h	Fri Jun 16 10:18:54 2006 -0600
    12.3 @@ -18,9 +18,6 @@ int create_grant_host_mapping(
    12.4  int destroy_grant_host_mapping(
    12.5      unsigned long addr, unsigned long frame, unsigned int flags);
    12.6  
    12.7 -int steal_page_for_grant_transfer(
    12.8 -    struct domain *d, struct page_info *page);
    12.9 -
   12.10  #define gnttab_create_shared_page(d, t, i)                               \
   12.11      do {                                                                 \
   12.12          share_xen_page_with_guest(                                       \
    13.1 --- a/xen/include/asm-x86/mm.h	Fri Jun 16 09:10:08 2006 -0600
    13.2 +++ b/xen/include/asm-x86/mm.h	Fri Jun 16 10:18:54 2006 -0600
    13.3 @@ -389,4 +389,7 @@ int __sync_lazy_execstate(void);
    13.4  long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
    13.5  long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
    13.6  
    13.7 +int steal_page(
    13.8 +    struct domain *d, struct page_info *page, unsigned int memflags);
    13.9 +
   13.10  #endif /* __ASM_X86_MM_H__ */
    14.1 --- a/xen/include/public/memory.h	Fri Jun 16 09:10:08 2006 -0600
    14.2 +++ b/xen/include/public/memory.h	Fri Jun 16 10:18:54 2006 -0600
    14.3 @@ -10,8 +10,8 @@
    14.4  #define __XEN_PUBLIC_MEMORY_H__
    14.5  
    14.6  /*
    14.7 - * Increase or decrease the specified domain's memory reservation. Returns a
    14.8 - * -ve errcode on failure, or the # extents successfully allocated or freed.
    14.9 + * Increase or decrease the specified domain's memory reservation. Returns the
   14.10 + * number of extents successfully allocated or freed.
   14.11   * arg == addr of struct xen_memory_reservation.
   14.12   */
   14.13  #define XENMEM_increase_reservation 0
   14.14 @@ -48,12 +48,54 @@ struct xen_memory_reservation {
   14.15       * Unprivileged domains can specify only DOMID_SELF.
   14.16       */
   14.17      domid_t        domid;
   14.18 -
   14.19  };
   14.20  typedef struct xen_memory_reservation xen_memory_reservation_t;
   14.21  DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
   14.22  
   14.23  /*
   14.24 + * An atomic exchange of memory pages. If return code is zero then
   14.25 + * @out.extent_list provides GMFNs of the newly-allocated memory.
   14.26 + * Returns zero on complete success, otherwise a negative error code.
   14.27 + * On complete success then always @nr_exchanged == @in.nr_extents.
   14.28 + * On partial success @nr_exchanged indicates how much work was done.
   14.29 + */
   14.30 +#define XENMEM_exchange             11
   14.31 +struct xen_memory_exchange {
   14.32 +    /*
   14.33 +     * [IN] Details of memory extents to be exchanged (GMFN bases).
   14.34 +     * Note that @in.address_bits is ignored and unused.
   14.35 +     */
   14.36 +    struct xen_memory_reservation in;
   14.37 +
   14.38 +    /*
   14.39 +     * [IN/OUT] Details of new memory extents.
   14.40 +     * We require that:
   14.41 +     *  1. @in.domid == @out.domid
   14.42 +     *  2. @in.nr_extents  << @in.extent_order == 
   14.43 +     *     @out.nr_extents << @out.extent_order
   14.44 +     *  3. @in.extent_start and @out.extent_start lists must not overlap
   14.45 +     *  4. @out.extent_start lists GPFN bases to be populated
   14.46 +     *  5. @out.extent_start is overwritten with allocated GMFN bases
   14.47 +     */
   14.48 +    struct xen_memory_reservation out;
   14.49 +
   14.50 +    /*
   14.51 +     * [OUT] Number of input extents that were successfully exchanged:
   14.52 +     *  1. The first @nr_exchanged input extents were successfully
   14.53 +     *     deallocated.
   14.54 +     *  2. The corresponding first entries in the output extent list correctly
   14.55 +     *     indicate the GMFNs that were successfully exchanged.
   14.56 +     *  3. All other input and output extents are untouched.
   14.57 +     *  4. If not all input exents are exchanged then the return code of this
   14.58 +     *     command will be non-zero.
   14.59 +     *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
   14.60 +     */
   14.61 +    unsigned long nr_exchanged;
   14.62 +};
   14.63 +typedef struct xen_memory_exchange xen_memory_exchange_t;
   14.64 +DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);
   14.65 +
   14.66 +/*
   14.67   * Returns the maximum machine frame number of mapped RAM in this system.
   14.68   * This command always succeeds (it never returns an error code).
   14.69   * arg == NULL.
    15.1 --- a/xen/include/xen/mm.h	Fri Jun 16 09:10:08 2006 -0600
    15.2 +++ b/xen/include/xen/mm.h	Fri Jun 16 10:18:54 2006 -0600
    15.3 @@ -60,13 +60,23 @@ void free_xenheap_pages(void *v, unsigne
    15.4  /* Domain suballocator. These functions are *not* interrupt-safe.*/
    15.5  void init_domheap_pages(paddr_t ps, paddr_t pe);
    15.6  struct page_info *alloc_domheap_pages(
    15.7 -    struct domain *d, unsigned int order, unsigned int flags);
    15.8 +    struct domain *d, unsigned int order, unsigned int memflags);
    15.9  void free_domheap_pages(struct page_info *pg, unsigned int order);
   15.10  unsigned long avail_domheap_pages(void);
   15.11  #define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
   15.12  #define free_domheap_page(p)  (free_domheap_pages(p,0))
   15.13  
   15.14 -#define ALLOC_DOM_DMA 1
   15.15 +int assign_pages(
   15.16 +    struct domain *d,
   15.17 +    struct page_info *pg,
   15.18 +    unsigned int order,
   15.19 +    unsigned int memflags);
   15.20 +
   15.21 +/* memflags: */
   15.22 +#define _MEMF_dma         0
   15.23 +#define  MEMF_dma         (1U<<_MEMF_dma)
   15.24 +#define _MEMF_no_refcount 1
   15.25 +#define  MEMF_no_refcount (1U<<_MEMF_no_refcount)
   15.26  
   15.27  #ifdef CONFIG_PAGEALLOC_MAX_ORDER
   15.28  #define MAX_ORDER CONFIG_PAGEALLOC_MAX_ORDER