ia64/xen-unstable

annotate xen/common/memory.c @ 11973:041507e2754c

[XEN] Make memory hypercalls NUMA-aware.

This patch modifies memory ops to use the NUMA-aware page allocator
functions. We use the target domain's VCPU0 placement to determine
which node's memory to use. We expect the system administrator to
utilize the exposed NUMA topology information to help craft guest
config files that are NUMA-friendly (use only processors and memory values
that will fit within a given node).

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
author kfraser@localhost.localdomain
date Wed Oct 25 12:30:08 2006 +0100 (2006-10-25)
parents 03fd2accb4d9
children 64100a77fd17
rev   line source
kaf24@6486 1 /******************************************************************************
kaf24@6486 2 * memory.c
kaf24@6486 3 *
kaf24@6486 4 * Code to handle memory-related requests.
kaf24@6486 5 *
kaf24@6486 6 * Copyright (c) 2003-2004, B Dragovic
kaf24@6486 7 * Copyright (c) 2003-2005, K A Fraser
kaf24@6486 8 */
kaf24@6486 9
kaf24@6486 10 #include <xen/config.h>
kaf24@6486 11 #include <xen/types.h>
kaf24@6486 12 #include <xen/lib.h>
kaf24@6486 13 #include <xen/mm.h>
kaf24@6486 14 #include <xen/perfc.h>
kaf24@6486 15 #include <xen/sched.h>
kaf24@6486 16 #include <xen/event.h>
kaf24@6486 17 #include <xen/shadow.h>
kaf24@8468 18 #include <xen/iocap.h>
kaf24@9068 19 #include <xen/guest_access.h>
kaf24@11219 20 #include <xen/errno.h>
kaf24@6486 21 #include <asm/current.h>
kaf24@6486 22 #include <asm/hardirq.h>
kaf24@6486 23 #include <public/memory.h>
kaf24@6486 24
kaf24@8871 25 /*
kaf24@8871 26 * To allow safe resume of do_memory_op() after preemption, we need to know
kaf24@8871 27 * at what point in the page list to resume. For this purpose I steal the
kaf24@8871 28 * high-order bits of the @cmd parameter, which are otherwise unused and zero.
kaf24@8871 29 */
kaf24@8871 30 #define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
kaf24@8871 31
kaf24@6486 32 static long
kaf24@6486 33 increase_reservation(
kaf24@6486 34 struct domain *d,
kaf24@10314 35 XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
kaf24@6486 36 unsigned int nr_extents,
kaf24@6486 37 unsigned int extent_order,
kfraser@10418 38 unsigned int memflags,
kaf24@6607 39 int *preempted)
kaf24@6486 40 {
kaf24@8726 41 struct page_info *page;
kaf24@10314 42 unsigned long i;
kaf24@10314 43 xen_pfn_t mfn;
kfraser@11973 44 /* use domain's first processor for locality parameter */
kfraser@11973 45 unsigned int cpu = d->vcpu[0]->processor;
kaf24@6486 46
kaf24@9068 47 if ( !guest_handle_is_null(extent_list) &&
kaf24@9068 48 !guest_handle_okay(extent_list, nr_extents) )
kaf24@6486 49 return 0;
kaf24@6486 50
kaf24@8468 51 if ( (extent_order != 0) &&
kaf24@8468 52 !multipage_allocation_permitted(current->domain) )
kaf24@6486 53 return 0;
kaf24@6486 54
kaf24@6486 55 for ( i = 0; i < nr_extents; i++ )
kaf24@6486 56 {
kaf24@6486 57 if ( hypercall_preempt_check() )
kaf24@6607 58 {
kaf24@6607 59 *preempted = 1;
kaf24@6486 60 return i;
kaf24@6607 61 }
kaf24@6486 62
kfraser@11973 63 if ( unlikely((page = __alloc_domheap_pages( d, cpu,
kfraser@11973 64 extent_order, memflags )) == NULL) )
kaf24@6486 65 {
kaf24@6752 66 DPRINTK("Could not allocate order=%d extent: "
kfraser@10418 67 "id=%d memflags=%x (%ld of %d)\n",
kfraser@10418 68 extent_order, d->domain_id, memflags, i, nr_extents);
kaf24@6486 69 return i;
kaf24@6486 70 }
kaf24@6486 71
kaf24@6486 72 /* Inform the domain of the new page's machine address. */
kaf24@9068 73 if ( !guest_handle_is_null(extent_list) )
kaf24@8859 74 {
kaf24@8859 75 mfn = page_to_mfn(page);
kaf24@9068 76 if ( unlikely(__copy_to_guest_offset(extent_list, i, &mfn, 1)) )
kaf24@8859 77 return i;
kaf24@8859 78 }
kaf24@6486 79 }
kaf24@6486 80
kaf24@6486 81 return nr_extents;
kaf24@6486 82 }
sos22@8688 83
kaf24@6486 84 static long
kaf24@8673 85 populate_physmap(
kaf24@8673 86 struct domain *d,
kaf24@10314 87 XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
kaf24@9068 88 unsigned int nr_extents,
kaf24@9068 89 unsigned int extent_order,
kfraser@10418 90 unsigned int memflags,
kaf24@9068 91 int *preempted)
kaf24@8673 92 {
kaf24@8726 93 struct page_info *page;
kaf24@10314 94 unsigned long i, j;
kaf24@10314 95 xen_pfn_t gpfn;
kaf24@10314 96 xen_pfn_t mfn;
kfraser@11973 97 /* use domain's first processor for locality parameter */
kfraser@11973 98 unsigned int cpu = d->vcpu[0]->processor;
kaf24@8673 99
kaf24@9068 100 if ( !guest_handle_okay(extent_list, nr_extents) )
kaf24@8673 101 return 0;
kaf24@8673 102
kaf24@8673 103 if ( (extent_order != 0) &&
kaf24@8673 104 !multipage_allocation_permitted(current->domain) )
kaf24@8673 105 return 0;
kaf24@8673 106
kaf24@8673 107 for ( i = 0; i < nr_extents; i++ )
kaf24@8673 108 {
kaf24@8673 109 if ( hypercall_preempt_check() )
kaf24@8673 110 {
kaf24@8673 111 *preempted = 1;
sos22@8688 112 goto out;
kaf24@8673 113 }
kaf24@8673 114
kaf24@9068 115 if ( unlikely(__copy_from_guest_offset(&gpfn, extent_list, i, 1)) )
kaf24@8859 116 goto out;
kaf24@8859 117
kfraser@11973 118 if ( unlikely((page = __alloc_domheap_pages( d, cpu,
kfraser@11973 119 extent_order, memflags )) == NULL) )
kaf24@8673 120 {
kaf24@8673 121 DPRINTK("Could not allocate order=%d extent: "
kfraser@10418 122 "id=%d memflags=%x (%ld of %d)\n",
kfraser@10418 123 extent_order, d->domain_id, memflags, i, nr_extents);
sos22@8688 124 goto out;
kaf24@8673 125 }
kaf24@8673 126
kaf24@8726 127 mfn = page_to_mfn(page);
kaf24@8673 128
kaf24@8694 129 if ( unlikely(shadow_mode_translate(d)) )
kaf24@8694 130 {
kaf24@8694 131 for ( j = 0; j < (1 << extent_order); j++ )
kaf24@8736 132 guest_physmap_add_page(d, gpfn + j, mfn + j);
sos22@8688 133 }
kaf24@8694 134 else
kaf24@8694 135 {
kaf24@8694 136 for ( j = 0; j < (1 << extent_order); j++ )
kaf24@8736 137 set_gpfn_from_mfn(mfn + j, gpfn + j);
kaf24@8673 138
sos22@8688 139 /* Inform the domain of the new page's machine address. */
kaf24@9068 140 if ( unlikely(__copy_to_guest_offset(extent_list, i, &mfn, 1)) )
sos22@8688 141 goto out;
sos22@8688 142 }
kaf24@8673 143 }
kaf24@8673 144
sos22@8688 145 out:
sos22@8688 146 return i;
kaf24@8673 147 }
cl349@9211 148
cl349@9211 149 int
cl349@9211 150 guest_remove_page(
cl349@9211 151 struct domain *d,
cl349@9211 152 unsigned long gmfn)
cl349@9211 153 {
cl349@9211 154 struct page_info *page;
cl349@9211 155 unsigned long mfn;
cl349@9211 156
cl349@9211 157 mfn = gmfn_to_mfn(d, gmfn);
cl349@9211 158 if ( unlikely(!mfn_valid(mfn)) )
cl349@9211 159 {
cl349@9211 160 DPRINTK("Domain %u page number %lx invalid\n",
tdeegan@11172 161 d->domain_id, gmfn);
cl349@9211 162 return 0;
cl349@9211 163 }
cl349@9211 164
cl349@9211 165 page = mfn_to_page(mfn);
cl349@9211 166 if ( unlikely(!get_page(page, d)) )
cl349@9211 167 {
cl349@9211 168 DPRINTK("Bad page free for domain %u\n", d->domain_id);
cl349@9211 169 return 0;
cl349@9211 170 }
cl349@9211 171
cl349@9211 172 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
cl349@9211 173 put_page_and_type(page);
cl349@9211 174
cl349@9211 175 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
cl349@9211 176 put_page(page);
cl349@9211 177
kfraser@10823 178 if ( unlikely(!page_is_removable(page)) )
kfraser@10584 179 {
kfraser@10584 180 /* We'll make this a guest-visible error in future, so take heed! */
kfraser@10584 181 DPRINTK("Dom%d freeing in-use page %lx (pseudophys %lx):"
kaf24@10744 182 " count=%lx type=%lx\n",
kfraser@10584 183 d->domain_id, mfn, get_gpfn_from_mfn(mfn),
kaf24@10744 184 (unsigned long)page->count_info, page->u.inuse.type_info);
kfraser@10584 185 }
kfraser@10584 186
kfraser@11212 187 guest_physmap_remove_page(d, gmfn, mfn);
cl349@9211 188
cl349@9211 189 put_page(page);
cl349@9211 190
cl349@9211 191 return 1;
cl349@9211 192 }
cl349@9211 193
kaf24@8673 194 static long
kaf24@6486 195 decrease_reservation(
kaf24@9068 196 struct domain *d,
kaf24@10314 197 XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
kaf24@6486 198 unsigned int nr_extents,
kaf24@6486 199 unsigned int extent_order,
kaf24@6607 200 int *preempted)
kaf24@6486 201 {
kaf24@10314 202 unsigned long i, j;
kaf24@10314 203 xen_pfn_t gmfn;
kaf24@6486 204
kaf24@9068 205 if ( !guest_handle_okay(extent_list, nr_extents) )
kaf24@6486 206 return 0;
kaf24@6486 207
kaf24@6486 208 for ( i = 0; i < nr_extents; i++ )
kaf24@6486 209 {
kaf24@6486 210 if ( hypercall_preempt_check() )
kaf24@6607 211 {
kaf24@6607 212 *preempted = 1;
kaf24@6486 213 return i;
kaf24@6607 214 }
kaf24@6486 215
kaf24@9068 216 if ( unlikely(__copy_from_guest_offset(&gmfn, extent_list, i, 1)) )
kaf24@6486 217 return i;
kaf24@6486 218
kaf24@6486 219 for ( j = 0; j < (1 << extent_order); j++ )
kaf24@6486 220 {
cl349@9211 221 if ( !guest_remove_page(d, gmfn + j) )
kaf24@6486 222 return i;
kaf24@6486 223 }
kaf24@6486 224 }
kaf24@6486 225
kaf24@6486 226 return nr_extents;
kaf24@6486 227 }
kaf24@6486 228
kaf24@8871 229 static long
kaf24@8871 230 translate_gpfn_list(
kaf24@9873 231 XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
kaf24@8871 232 {
kaf24@8871 233 struct xen_translate_gpfn_list op;
kaf24@10314 234 unsigned long i;
kaf24@10314 235 xen_pfn_t gpfn;
kaf24@10314 236 xen_pfn_t mfn;
kaf24@8871 237 struct domain *d;
kaf24@6486 238
kaf24@9068 239 if ( copy_from_guest(&op, uop, 1) )
kaf24@8871 240 return -EFAULT;
kaf24@8871 241
kaf24@8871 242 /* Is size too large for us to encode a continuation? */
kaf24@8871 243 if ( op.nr_gpfns > (ULONG_MAX >> START_EXTENT_SHIFT) )
kaf24@8871 244 return -EINVAL;
kaf24@8871 245
kaf24@9068 246 if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
kaf24@9068 247 !guest_handle_okay(op.mfn_list, op.nr_gpfns) )
kaf24@8871 248 return -EFAULT;
kaf24@8871 249
kaf24@8871 250 if ( op.domid == DOMID_SELF )
kaf24@8871 251 op.domid = current->domain->domain_id;
kaf24@8871 252 else if ( !IS_PRIV(current->domain) )
kaf24@8871 253 return -EPERM;
kaf24@8871 254
kaf24@8871 255 if ( (d = find_domain_by_id(op.domid)) == NULL )
kaf24@8871 256 return -ESRCH;
kaf24@8871 257
kfraser@11212 258 if ( !shadow_mode_translate(d) )
kaf24@8871 259 {
kaf24@8871 260 put_domain(d);
kaf24@8871 261 return -EINVAL;
kaf24@8871 262 }
kaf24@8871 263
kaf24@8871 264 for ( i = *progress; i < op.nr_gpfns; i++ )
kaf24@8871 265 {
kaf24@8871 266 if ( hypercall_preempt_check() )
kaf24@8871 267 {
kaf24@8871 268 put_domain(d);
kaf24@8871 269 *progress = i;
kaf24@8871 270 return -EAGAIN;
kaf24@8871 271 }
kaf24@8871 272
kaf24@9068 273 if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
kaf24@8871 274 {
kaf24@8871 275 put_domain(d);
kaf24@8871 276 return -EFAULT;
kaf24@8871 277 }
kaf24@8871 278
kaf24@8871 279 mfn = gmfn_to_mfn(d, gpfn);
kaf24@8871 280
kaf24@9068 281 if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
kaf24@8871 282 {
kaf24@8871 283 put_domain(d);
kaf24@8871 284 return -EFAULT;
kaf24@8871 285 }
kaf24@8871 286 }
kaf24@8871 287
kaf24@8871 288 put_domain(d);
kaf24@8871 289 return 0;
kaf24@8871 290 }
kaf24@8871 291
kfraser@10418 292 static long
kfraser@10418 293 memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
kfraser@10418 294 {
kfraser@10418 295 struct xen_memory_exchange exch;
kfraser@10418 296 LIST_HEAD(in_chunk_list);
kfraser@10418 297 LIST_HEAD(out_chunk_list);
kfraser@10418 298 unsigned long in_chunk_order, out_chunk_order;
kaf24@10459 299 xen_pfn_t gpfn, gmfn, mfn;
kfraser@10418 300 unsigned long i, j, k;
kfraser@11973 301 unsigned int memflags = 0, cpu;
kfraser@10418 302 long rc = 0;
kfraser@10418 303 struct domain *d;
kfraser@10418 304 struct page_info *page;
kfraser@10418 305
kfraser@10418 306 if ( copy_from_guest(&exch, arg, 1) )
kfraser@10418 307 return -EFAULT;
kfraser@10418 308
kfraser@10418 309 /* Various sanity checks. */
kfraser@10418 310 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
kfraser@10418 311 /* Input and output domain identifiers match? */
kfraser@10418 312 (exch.in.domid != exch.out.domid) ||
kfraser@10418 313 /* Sizes of input and output lists do not overflow a long? */
kfraser@10418 314 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
kfraser@10418 315 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
kfraser@10418 316 /* Sizes of input and output lists match? */
kfraser@10418 317 ((exch.in.nr_extents << exch.in.extent_order) !=
kfraser@10418 318 (exch.out.nr_extents << exch.out.extent_order)) )
kfraser@10418 319 {
kfraser@10418 320 rc = -EINVAL;
kfraser@10418 321 goto fail_early;
kfraser@10418 322 }
kfraser@10418 323
kfraser@10418 324 /* Only privileged guests can allocate multi-page contiguous extents. */
kfraser@10418 325 if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
kfraser@10418 326 !multipage_allocation_permitted(current->domain) )
kfraser@10418 327 {
kfraser@10418 328 rc = -EPERM;
kfraser@10418 329 goto fail_early;
kfraser@10418 330 }
kfraser@10418 331
kfraser@10418 332 if ( (exch.out.address_bits != 0) &&
kfraser@10418 333 (exch.out.address_bits <
kfraser@10418 334 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
kfraser@10418 335 {
kfraser@10418 336 if ( exch.out.address_bits < 31 )
kfraser@10418 337 {
kfraser@10418 338 rc = -ENOMEM;
kfraser@10418 339 goto fail_early;
kfraser@10418 340 }
kfraser@10418 341 memflags = MEMF_dma;
kfraser@10418 342 }
kfraser@10418 343
kfraser@10418 344 guest_handle_add_offset(exch.in.extent_start, exch.nr_exchanged);
kfraser@10418 345 exch.in.nr_extents -= exch.nr_exchanged;
kfraser@10418 346
kfraser@10418 347 if ( exch.in.extent_order <= exch.out.extent_order )
kfraser@10418 348 {
kfraser@10418 349 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
kfraser@10418 350 out_chunk_order = 0;
kfraser@10418 351 guest_handle_add_offset(
kfraser@10418 352 exch.out.extent_start, exch.nr_exchanged >> in_chunk_order);
kfraser@10418 353 exch.out.nr_extents -= exch.nr_exchanged >> in_chunk_order;
kfraser@10418 354 }
kfraser@10418 355 else
kfraser@10418 356 {
kfraser@10418 357 in_chunk_order = 0;
kfraser@10418 358 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
kfraser@10418 359 guest_handle_add_offset(
kfraser@10418 360 exch.out.extent_start, exch.nr_exchanged << out_chunk_order);
kfraser@10418 361 exch.out.nr_extents -= exch.nr_exchanged << out_chunk_order;
kfraser@10418 362 }
kfraser@10418 363
kfraser@10418 364 /*
kfraser@10418 365 * Only support exchange on calling domain right now. Otherwise there are
kfraser@10418 366 * tricky corner cases to consider (e.g., DOMF_dying domain).
kfraser@10418 367 */
kfraser@10418 368 if ( unlikely(exch.in.domid != DOMID_SELF) )
kfraser@10418 369 {
kfraser@10418 370 rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
kfraser@10418 371 goto fail_early;
kfraser@10418 372 }
kfraser@10418 373 d = current->domain;
kfraser@10418 374
kfraser@11973 375 /* use domain's first processor for locality parameter */
kfraser@11973 376 cpu = d->vcpu[0]->processor;
kfraser@11973 377
kfraser@10418 378 for ( i = 0; i < (exch.in.nr_extents >> in_chunk_order); i++ )
kfraser@10418 379 {
kfraser@10418 380 if ( hypercall_preempt_check() )
kfraser@10418 381 {
kfraser@10418 382 exch.nr_exchanged += i << in_chunk_order;
kfraser@10418 383 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
kfraser@10418 384 return -EFAULT;
kfraser@10418 385 return hypercall_create_continuation(
kfraser@10418 386 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
kfraser@10418 387 }
kfraser@10418 388
kfraser@10418 389 /* Steal a chunk's worth of input pages from the domain. */
kfraser@10418 390 for ( j = 0; j < (1UL << in_chunk_order); j++ )
kfraser@10418 391 {
kfraser@10418 392 if ( unlikely(__copy_from_guest_offset(
kfraser@10418 393 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
kfraser@10418 394 {
kfraser@10418 395 rc = -EFAULT;
kfraser@10418 396 goto fail;
kfraser@10418 397 }
kfraser@10418 398
kfraser@10418 399 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
kfraser@10418 400 {
kfraser@10418 401 mfn = gmfn_to_mfn(d, gmfn + k);
kfraser@10418 402 if ( unlikely(!mfn_valid(mfn)) )
kfraser@10418 403 {
kfraser@10418 404 rc = -EINVAL;
kfraser@10418 405 goto fail;
kfraser@10418 406 }
kfraser@10418 407
kfraser@10418 408 page = mfn_to_page(mfn);
kfraser@10418 409
kfraser@10418 410 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
kfraser@10418 411 {
kfraser@10418 412 rc = -EINVAL;
kfraser@10418 413 goto fail;
kfraser@10418 414 }
kfraser@10418 415
kfraser@10418 416 list_add(&page->list, &in_chunk_list);
kfraser@10418 417 }
kfraser@10418 418 }
kfraser@10418 419
kfraser@10418 420 /* Allocate a chunk's worth of anonymous output pages. */
kfraser@10418 421 for ( j = 0; j < (1UL << out_chunk_order); j++ )
kfraser@10418 422 {
kfraser@11973 423 page = __alloc_domheap_pages( NULL, cpu,
kfraser@11973 424 exch.out.extent_order, memflags);
kfraser@10418 425 if ( unlikely(page == NULL) )
kfraser@10418 426 {
kfraser@10418 427 rc = -ENOMEM;
kfraser@10418 428 goto fail;
kfraser@10418 429 }
kfraser@10418 430
kfraser@10418 431 list_add(&page->list, &out_chunk_list);
kfraser@10418 432 }
kfraser@10418 433
kfraser@10418 434 /*
kfraser@10418 435 * Success! Beyond this point we cannot fail for this chunk.
kfraser@10418 436 */
kfraser@10418 437
kfraser@10418 438 /* Destroy final reference to each input page. */
kfraser@10418 439 while ( !list_empty(&in_chunk_list) )
kfraser@10418 440 {
kfraser@10418 441 page = list_entry(in_chunk_list.next, struct page_info, list);
kfraser@10418 442 list_del(&page->list);
kfraser@10418 443 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
kfraser@10418 444 BUG();
kfraser@10418 445 mfn = page_to_mfn(page);
kfraser@10418 446 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
kfraser@10418 447 put_page(page);
kfraser@10418 448 }
kfraser@10418 449
kfraser@10418 450 /* Assign each output page to the domain. */
kfraser@10418 451 j = 0;
kfraser@10418 452 while ( !list_empty(&out_chunk_list) )
kfraser@10418 453 {
kfraser@10418 454 page = list_entry(out_chunk_list.next, struct page_info, list);
kfraser@10418 455 list_del(&page->list);
kfraser@10418 456 if ( assign_pages(d, page, exch.out.extent_order,
kfraser@10418 457 MEMF_no_refcount) )
kfraser@10418 458 BUG();
kfraser@10418 459
kfraser@10418 460 /* Note that we ignore errors accessing the output extent list. */
kfraser@10418 461 (void)__copy_from_guest_offset(
kfraser@10418 462 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
kfraser@10418 463
kfraser@10418 464 mfn = page_to_mfn(page);
kfraser@10418 465 if ( unlikely(shadow_mode_translate(d)) )
kfraser@10418 466 {
kfraser@10418 467 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
kfraser@10418 468 guest_physmap_add_page(d, gpfn + k, mfn + k);
kfraser@10418 469 }
kfraser@10418 470 else
kfraser@10418 471 {
kfraser@10418 472 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
kfraser@10418 473 set_gpfn_from_mfn(mfn + k, gpfn + k);
kfraser@10418 474 (void)__copy_to_guest_offset(
kfraser@10418 475 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
kfraser@10418 476 }
kfraser@10418 477
kfraser@10418 478 j++;
kfraser@10418 479 }
kfraser@10418 480 BUG_ON(j != (1UL << out_chunk_order));
kfraser@10418 481 }
kfraser@10418 482
kfraser@10418 483 exch.nr_exchanged += exch.in.nr_extents;
kfraser@10418 484 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
kfraser@10418 485 rc = -EFAULT;
kfraser@10418 486 return rc;
kfraser@10418 487
kfraser@10418 488 /*
kfraser@10418 489 * Failed a chunk! Free any partial chunk work. Tell caller how many
kfraser@10418 490 * chunks succeeded.
kfraser@10418 491 */
kfraser@10418 492 fail:
kfraser@10418 493 /* Reassign any input pages we managed to steal. */
kfraser@10418 494 while ( !list_empty(&in_chunk_list) )
kfraser@10418 495 {
kfraser@10418 496 page = list_entry(in_chunk_list.next, struct page_info, list);
kfraser@10418 497 list_del(&page->list);
kfraser@10418 498 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
kfraser@10418 499 BUG();
kfraser@10418 500 }
kfraser@10418 501
kfraser@10418 502 /* Free any output pages we managed to allocate. */
kfraser@10418 503 while ( !list_empty(&out_chunk_list) )
kfraser@10418 504 {
kfraser@10418 505 page = list_entry(out_chunk_list.next, struct page_info, list);
kfraser@10418 506 list_del(&page->list);
kfraser@10418 507 free_domheap_pages(page, exch.out.extent_order);
kfraser@10418 508 }
kfraser@10418 509
kfraser@10418 510 exch.nr_exchanged += i << in_chunk_order;
kfraser@10418 511
kfraser@10418 512 fail_early:
kfraser@10418 513 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
kfraser@10418 514 rc = -EFAULT;
kfraser@10418 515 return rc;
kfraser@10418 516 }
kfraser@10418 517
kaf24@9873 518 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
kaf24@6486 519 {
kaf24@6486 520 struct domain *d;
kfraser@10418 521 int rc, op, preempted = 0;
kfraser@10418 522 unsigned int memflags = 0;
kaf24@8871 523 unsigned long start_extent, progress;
kaf24@6486 524 struct xen_memory_reservation reservation;
kaf24@7959 525 domid_t domid;
kaf24@6486 526
kaf24@6486 527 op = cmd & ((1 << START_EXTENT_SHIFT) - 1);
kaf24@6486 528
kaf24@6486 529 switch ( op )
kaf24@6486 530 {
kaf24@6486 531 case XENMEM_increase_reservation:
kaf24@6486 532 case XENMEM_decrease_reservation:
kaf24@8673 533 case XENMEM_populate_physmap:
kfraser@10418 534 start_extent = cmd >> START_EXTENT_SHIFT;
kfraser@10418 535
kaf24@9068 536 if ( copy_from_guest(&reservation, arg, 1) )
kfraser@10418 537 return start_extent;
kaf24@6486 538
kaf24@8871 539 /* Is size too large for us to encode a continuation? */
kaf24@8871 540 if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) )
kfraser@10418 541 return start_extent;
kaf24@8871 542
kaf24@6486 543 if ( unlikely(start_extent > reservation.nr_extents) )
kfraser@10418 544 return start_extent;
kaf24@9068 545
kaf24@9068 546 if ( !guest_handle_is_null(reservation.extent_start) )
kaf24@9068 547 guest_handle_add_offset(reservation.extent_start, start_extent);
kaf24@6486 548 reservation.nr_extents -= start_extent;
kaf24@6486 549
kaf24@6701 550 if ( (reservation.address_bits != 0) &&
kaf24@6702 551 (reservation.address_bits <
kaf24@6702 552 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
kaf24@6486 553 {
kaf24@6486 554 if ( reservation.address_bits < 31 )
kfraser@10418 555 return start_extent;
kfraser@10418 556 memflags = MEMF_dma;
kaf24@6486 557 }
kaf24@6486 558
kaf24@6486 559 if ( likely(reservation.domid == DOMID_SELF) )
kaf24@6486 560 d = current->domain;
kfraser@10418 561 else if ( !IS_PRIV(current->domain) ||
kfraser@10418 562 ((d = find_domain_by_id(reservation.domid)) == NULL) )
kfraser@10418 563 return start_extent;
kaf24@6486 564
kaf24@8673 565 switch ( op )
kaf24@8673 566 {
kaf24@8673 567 case XENMEM_increase_reservation:
kaf24@8673 568 rc = increase_reservation(
kaf24@8673 569 d,
kaf24@8673 570 reservation.extent_start,
kaf24@8673 571 reservation.nr_extents,
kaf24@8673 572 reservation.extent_order,
kfraser@10418 573 memflags,
kaf24@8673 574 &preempted);
kaf24@8673 575 break;
kaf24@8673 576 case XENMEM_decrease_reservation:
kaf24@8673 577 rc = decrease_reservation(
kaf24@8673 578 d,
kaf24@8673 579 reservation.extent_start,
kaf24@8673 580 reservation.nr_extents,
kaf24@8673 581 reservation.extent_order,
kaf24@8673 582 &preempted);
kaf24@8673 583 break;
kaf24@8673 584 case XENMEM_populate_physmap:
kaf24@8673 585 default:
kaf24@8673 586 rc = populate_physmap(
kaf24@8673 587 d,
kaf24@8673 588 reservation.extent_start,
kaf24@8673 589 reservation.nr_extents,
kaf24@8673 590 reservation.extent_order,
kfraser@10418 591 memflags,
kaf24@8673 592 &preempted);
kaf24@8673 593 break;
kaf24@8673 594 }
kaf24@6486 595
kaf24@6486 596 if ( unlikely(reservation.domid != DOMID_SELF) )
kaf24@6486 597 put_domain(d);
kaf24@6486 598
kaf24@6486 599 rc += start_extent;
kaf24@6486 600
kaf24@6607 601 if ( preempted )
kaf24@9068 602 return hypercall_create_continuation(
kaf24@9068 603 __HYPERVISOR_memory_op, "lh",
kaf24@9068 604 op | (rc << START_EXTENT_SHIFT), arg);
kaf24@6607 605
kaf24@6486 606 break;
kaf24@6486 607
kfraser@10418 608 case XENMEM_exchange:
kfraser@10418 609 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
kfraser@10418 610 break;
kfraser@10418 611
kaf24@6486 612 case XENMEM_maximum_ram_page:
kaf24@7959 613 rc = max_page;
kaf24@7959 614 break;
kaf24@7959 615
kaf24@7959 616 case XENMEM_current_reservation:
kaf24@7959 617 case XENMEM_maximum_reservation:
kaf24@9068 618 if ( copy_from_guest(&domid, arg, 1) )
kaf24@6486 619 return -EFAULT;
kaf24@7959 620
kaf24@9068 621 if ( likely(domid == DOMID_SELF) )
kaf24@7959 622 d = current->domain;
kaf24@7959 623 else if ( !IS_PRIV(current->domain) )
kaf24@7959 624 return -EPERM;
kaf24@7959 625 else if ( (d = find_domain_by_id(domid)) == NULL )
kaf24@7959 626 return -ESRCH;
kaf24@7959 627
kaf24@7959 628 rc = (op == XENMEM_current_reservation) ? d->tot_pages : d->max_pages;
kaf24@7959 629
kaf24@7959 630 if ( unlikely(domid != DOMID_SELF) )
kaf24@7959 631 put_domain(d);
kaf24@7959 632
kaf24@6486 633 break;
kaf24@6486 634
kaf24@8871 635 case XENMEM_translate_gpfn_list:
kaf24@8871 636 progress = cmd >> START_EXTENT_SHIFT;
kaf24@9068 637 rc = translate_gpfn_list(
kaf24@9068 638 guest_handle_cast(arg, xen_translate_gpfn_list_t),
kaf24@9068 639 &progress);
kaf24@8871 640 if ( rc == -EAGAIN )
kaf24@9068 641 return hypercall_create_continuation(
kaf24@9068 642 __HYPERVISOR_memory_op, "lh",
kaf24@9068 643 op | (progress << START_EXTENT_SHIFT), arg);
kaf24@8871 644 break;
kaf24@8871 645
kaf24@6486 646 default:
kaf24@8059 647 rc = arch_memory_op(op, arg);
kaf24@6486 648 break;
kaf24@6486 649 }
kaf24@6486 650
kaf24@6486 651 return rc;
kaf24@6486 652 }
kaf24@6486 653
kaf24@6486 654 /*
kaf24@6486 655 * Local variables:
kaf24@6486 656 * mode: C
kaf24@6486 657 * c-set-style: "BSD"
kaf24@6486 658 * c-basic-offset: 4
kaf24@6486 659 * tab-width: 4
kaf24@6486 660 * indent-tabs-mode: nil
kaf24@6486 661 * End:
kaf24@6486 662 */