ia64/xen-unstable

view xen/common/memory.c @ 18594:5e4e234d58be

x86: Define __per_cpu_shift label to help kdump/crashdump.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Oct 08 13:11:06 2008 +0100 (2008-10-08)
parents 9a7b46546e05
children bd33ff263e2c
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/paging.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <xen/hypercall.h>
21 #include <xen/errno.h>
22 #include <asm/current.h>
23 #include <asm/hardirq.h>
24 #include <xen/numa.h>
25 #include <public/memory.h>
26 #include <xsm/xsm.h>
28 struct memop_args {
29 /* INPUT */
30 struct domain *domain; /* Domain to be affected. */
31 XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */
32 unsigned int nr_extents; /* Number of extents to allocate or free. */
33 unsigned int extent_order; /* Size of each extent. */
34 unsigned int memflags; /* Allocation flags. */
36 /* INPUT/OUTPUT */
37 unsigned int nr_done; /* Number of extents processed so far. */
38 int preempted; /* Was the hypercall preempted? */
39 };
41 static void increase_reservation(struct memop_args *a)
42 {
43 struct page_info *page;
44 unsigned long i;
45 xen_pfn_t mfn;
46 struct domain *d = a->domain;
48 if ( !guest_handle_is_null(a->extent_list) &&
49 !guest_handle_subrange_okay(a->extent_list, a->nr_done,
50 a->nr_extents-1) )
51 return;
53 if ( (a->extent_order != 0) &&
54 !multipage_allocation_permitted(current->domain) )
55 return;
57 for ( i = a->nr_done; i < a->nr_extents; i++ )
58 {
59 if ( hypercall_preempt_check() )
60 {
61 a->preempted = 1;
62 goto out;
63 }
65 page = alloc_domheap_pages(d, a->extent_order, a->memflags);
66 if ( unlikely(page == NULL) )
67 {
68 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
69 "id=%d memflags=%x (%ld of %d)\n",
70 a->extent_order, d->domain_id, a->memflags,
71 i, a->nr_extents);
72 goto out;
73 }
75 /* Inform the domain of the new page's machine address. */
76 if ( !guest_handle_is_null(a->extent_list) )
77 {
78 mfn = page_to_mfn(page);
79 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
80 goto out;
81 }
82 }
84 out:
85 a->nr_done = i;
86 }
88 static void populate_physmap(struct memop_args *a)
89 {
90 struct page_info *page;
91 unsigned long i, j;
92 xen_pfn_t gpfn, mfn;
93 struct domain *d = a->domain;
95 if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
96 a->nr_extents-1) )
97 return;
99 if ( (a->extent_order != 0) &&
100 !multipage_allocation_permitted(current->domain) )
101 return;
103 for ( i = a->nr_done; i < a->nr_extents; i++ )
104 {
105 if ( hypercall_preempt_check() )
106 {
107 a->preempted = 1;
108 goto out;
109 }
111 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
112 goto out;
114 page = alloc_domheap_pages(d, a->extent_order, a->memflags);
115 if ( unlikely(page == NULL) )
116 {
117 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
118 "id=%d memflags=%x (%ld of %d)\n",
119 a->extent_order, d->domain_id, a->memflags,
120 i, a->nr_extents);
121 goto out;
122 }
124 mfn = page_to_mfn(page);
125 guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
127 if ( !paging_mode_translate(d) )
128 {
129 for ( j = 0; j < (1 << a->extent_order); j++ )
130 set_gpfn_from_mfn(mfn + j, gpfn + j);
132 /* Inform the domain of the new page's machine address. */
133 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
134 goto out;
135 }
136 }
138 out:
139 a->nr_done = i;
140 }
142 int guest_remove_page(struct domain *d, unsigned long gmfn)
143 {
144 struct page_info *page;
145 unsigned long mfn;
147 mfn = gmfn_to_mfn(d, gmfn);
148 if ( unlikely(!mfn_valid(mfn)) )
149 {
150 gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
151 d->domain_id, gmfn);
152 return 0;
153 }
155 page = mfn_to_page(mfn);
156 if ( unlikely(!get_page(page, d)) )
157 {
158 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
159 return 0;
160 }
162 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
163 put_page_and_type(page);
165 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
166 put_page(page);
168 guest_physmap_remove_page(d, gmfn, mfn, 0);
170 put_page(page);
172 return 1;
173 }
175 static void decrease_reservation(struct memop_args *a)
176 {
177 unsigned long i, j;
178 xen_pfn_t gmfn;
180 if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
181 a->nr_extents-1) )
182 return;
184 for ( i = a->nr_done; i < a->nr_extents; i++ )
185 {
186 if ( hypercall_preempt_check() )
187 {
188 a->preempted = 1;
189 goto out;
190 }
192 if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
193 goto out;
195 for ( j = 0; j < (1 << a->extent_order); j++ )
196 if ( !guest_remove_page(a->domain, gmfn + j) )
197 goto out;
198 }
200 out:
201 a->nr_done = i;
202 }
204 static long translate_gpfn_list(
205 XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
206 {
207 struct xen_translate_gpfn_list op;
208 unsigned long i;
209 xen_pfn_t gpfn;
210 xen_pfn_t mfn;
211 struct domain *d;
212 int rc;
214 if ( copy_from_guest(&op, uop, 1) )
215 return -EFAULT;
217 /* Is size too large for us to encode a continuation? */
218 if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
219 return -EINVAL;
221 if ( !guest_handle_subrange_okay(op.gpfn_list, *progress, op.nr_gpfns-1) ||
222 !guest_handle_subrange_okay(op.mfn_list, *progress, op.nr_gpfns-1) )
223 return -EFAULT;
225 rc = rcu_lock_target_domain_by_id(op.domid, &d);
226 if ( rc )
227 return rc;
229 if ( !paging_mode_translate(d) )
230 {
231 rcu_unlock_domain(d);
232 return -EINVAL;
233 }
235 for ( i = *progress; i < op.nr_gpfns; i++ )
236 {
237 if ( hypercall_preempt_check() )
238 {
239 rcu_unlock_domain(d);
240 *progress = i;
241 return -EAGAIN;
242 }
244 if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
245 {
246 rcu_unlock_domain(d);
247 return -EFAULT;
248 }
250 mfn = gmfn_to_mfn(d, gpfn);
252 rc = xsm_translate_gpfn_list(current->domain, mfn);
253 if ( rc )
254 {
255 rcu_unlock_domain(d);
256 return rc;
257 }
259 if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
260 {
261 rcu_unlock_domain(d);
262 return -EFAULT;
263 }
264 }
266 rcu_unlock_domain(d);
267 return 0;
268 }
270 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
271 {
272 struct xen_memory_exchange exch;
273 LIST_HEAD(in_chunk_list);
274 LIST_HEAD(out_chunk_list);
275 unsigned long in_chunk_order, out_chunk_order;
276 xen_pfn_t gpfn, gmfn, mfn;
277 unsigned long i, j, k;
278 unsigned int node, memflags = 0;
279 long rc = 0;
280 struct domain *d;
281 struct page_info *page;
283 if ( copy_from_guest(&exch, arg, 1) )
284 return -EFAULT;
286 /* Various sanity checks. */
287 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
288 /* Input and output domain identifiers match? */
289 (exch.in.domid != exch.out.domid) ||
290 /* Sizes of input and output lists do not overflow a long? */
291 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
292 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
293 /* Sizes of input and output lists match? */
294 ((exch.in.nr_extents << exch.in.extent_order) !=
295 (exch.out.nr_extents << exch.out.extent_order)) )
296 {
297 rc = -EINVAL;
298 goto fail_early;
299 }
301 /* Only privileged guests can allocate multi-page contiguous extents. */
302 if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
303 !multipage_allocation_permitted(current->domain) )
304 {
305 rc = -EPERM;
306 goto fail_early;
307 }
309 if ( exch.in.extent_order <= exch.out.extent_order )
310 {
311 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
312 out_chunk_order = 0;
313 }
314 else
315 {
316 in_chunk_order = 0;
317 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
318 }
320 /*
321 * Only support exchange on calling domain right now. Otherwise there are
322 * tricky corner cases to consider (e.g., dying domain).
323 */
324 if ( unlikely(exch.in.domid != DOMID_SELF) )
325 {
326 rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
327 goto fail_early;
328 }
329 d = current->domain;
331 memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
332 d,
333 XENMEMF_get_address_bits(exch.out.mem_flags) ? :
334 (BITS_PER_LONG+PAGE_SHIFT)));
335 node = XENMEMF_get_node(exch.out.mem_flags);
336 if ( node == NUMA_NO_NODE )
337 node = domain_to_node(d);
338 memflags |= MEMF_node(node);
340 for ( i = (exch.nr_exchanged >> in_chunk_order);
341 i < (exch.in.nr_extents >> in_chunk_order);
342 i++ )
343 {
344 if ( hypercall_preempt_check() )
345 {
346 exch.nr_exchanged = i << in_chunk_order;
347 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
348 return -EFAULT;
349 return hypercall_create_continuation(
350 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
351 }
353 /* Steal a chunk's worth of input pages from the domain. */
354 for ( j = 0; j < (1UL << in_chunk_order); j++ )
355 {
356 if ( unlikely(__copy_from_guest_offset(
357 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
358 {
359 rc = -EFAULT;
360 goto fail;
361 }
363 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
364 {
365 mfn = gmfn_to_mfn(d, gmfn + k);
366 if ( unlikely(!mfn_valid(mfn)) )
367 {
368 rc = -EINVAL;
369 goto fail;
370 }
372 page = mfn_to_page(mfn);
374 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
375 {
376 rc = -EINVAL;
377 goto fail;
378 }
380 list_add(&page->list, &in_chunk_list);
381 }
382 }
384 /* Allocate a chunk's worth of anonymous output pages. */
385 for ( j = 0; j < (1UL << out_chunk_order); j++ )
386 {
387 page = alloc_domheap_pages(NULL, exch.out.extent_order, memflags);
388 if ( unlikely(page == NULL) )
389 {
390 rc = -ENOMEM;
391 goto fail;
392 }
394 list_add(&page->list, &out_chunk_list);
395 }
397 /*
398 * Success! Beyond this point we cannot fail for this chunk.
399 */
401 /* Destroy final reference to each input page. */
402 while ( !list_empty(&in_chunk_list) )
403 {
404 page = list_entry(in_chunk_list.next, struct page_info, list);
405 list_del(&page->list);
406 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
407 BUG();
408 mfn = page_to_mfn(page);
409 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn, 0);
410 put_page(page);
411 }
413 /* Assign each output page to the domain. */
414 j = 0;
415 while ( !list_empty(&out_chunk_list) )
416 {
417 page = list_entry(out_chunk_list.next, struct page_info, list);
418 list_del(&page->list);
419 if ( assign_pages(d, page, exch.out.extent_order,
420 MEMF_no_refcount) )
421 BUG();
423 /* Note that we ignore errors accessing the output extent list. */
424 (void)__copy_from_guest_offset(
425 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
427 mfn = page_to_mfn(page);
428 guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
430 if ( !paging_mode_translate(d) )
431 {
432 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
433 set_gpfn_from_mfn(mfn + k, gpfn + k);
434 (void)__copy_to_guest_offset(
435 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
436 }
438 j++;
439 }
440 BUG_ON(j != (1UL << out_chunk_order));
441 }
443 exch.nr_exchanged = exch.in.nr_extents;
444 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
445 rc = -EFAULT;
446 return rc;
448 /*
449 * Failed a chunk! Free any partial chunk work. Tell caller how many
450 * chunks succeeded.
451 */
452 fail:
453 /* Reassign any input pages we managed to steal. */
454 while ( !list_empty(&in_chunk_list) )
455 {
456 page = list_entry(in_chunk_list.next, struct page_info, list);
457 list_del(&page->list);
458 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
459 BUG();
460 }
462 /* Free any output pages we managed to allocate. */
463 while ( !list_empty(&out_chunk_list) )
464 {
465 page = list_entry(out_chunk_list.next, struct page_info, list);
466 list_del(&page->list);
467 free_domheap_pages(page, exch.out.extent_order);
468 }
470 exch.nr_exchanged = i << in_chunk_order;
472 fail_early:
473 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
474 rc = -EFAULT;
475 return rc;
476 }
478 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
479 {
480 struct domain *d;
481 int rc, op;
482 unsigned int address_bits;
483 unsigned long start_extent, progress;
484 struct xen_memory_reservation reservation;
485 struct memop_args args;
486 domid_t domid;
488 op = cmd & MEMOP_CMD_MASK;
490 switch ( op )
491 {
492 case XENMEM_increase_reservation:
493 case XENMEM_decrease_reservation:
494 case XENMEM_populate_physmap:
495 start_extent = cmd >> MEMOP_EXTENT_SHIFT;
497 if ( copy_from_guest(&reservation, arg, 1) )
498 return start_extent;
500 /* Is size too large for us to encode a continuation? */
501 if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
502 return start_extent;
504 if ( unlikely(start_extent > reservation.nr_extents) )
505 return start_extent;
507 args.extent_list = reservation.extent_start;
508 args.nr_extents = reservation.nr_extents;
509 args.extent_order = reservation.extent_order;
510 args.nr_done = start_extent;
511 args.preempted = 0;
512 args.memflags = 0;
514 address_bits = XENMEMF_get_address_bits(reservation.mem_flags);
515 if ( (address_bits != 0) &&
516 (address_bits < (get_order_from_pages(max_page) + PAGE_SHIFT)) )
517 {
518 if ( address_bits <= PAGE_SHIFT )
519 return start_extent;
520 args.memflags = MEMF_bits(address_bits);
521 }
523 args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
525 if ( likely(reservation.domid == DOMID_SELF) )
526 {
527 d = rcu_lock_current_domain();
528 }
529 else
530 {
531 if ( (d = rcu_lock_domain_by_id(reservation.domid)) == NULL )
532 return start_extent;
533 if ( !IS_PRIV_FOR(current->domain, d) )
534 {
535 rcu_unlock_domain(d);
536 return start_extent;
537 }
538 }
539 args.domain = d;
541 rc = xsm_memory_adjust_reservation(current->domain, d);
542 if ( rc )
543 {
544 rcu_unlock_domain(d);
545 return rc;
546 }
548 switch ( op )
549 {
550 case XENMEM_increase_reservation:
551 increase_reservation(&args);
552 break;
553 case XENMEM_decrease_reservation:
554 decrease_reservation(&args);
555 break;
556 default: /* XENMEM_populate_physmap */
557 populate_physmap(&args);
558 break;
559 }
561 rcu_unlock_domain(d);
563 rc = args.nr_done;
565 if ( args.preempted )
566 return hypercall_create_continuation(
567 __HYPERVISOR_memory_op, "lh",
568 op | (rc << MEMOP_EXTENT_SHIFT), arg);
570 break;
572 case XENMEM_exchange:
573 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
574 break;
576 case XENMEM_maximum_ram_page:
577 rc = max_page;
578 break;
580 case XENMEM_current_reservation:
581 case XENMEM_maximum_reservation:
582 case XENMEM_maximum_gpfn:
583 if ( copy_from_guest(&domid, arg, 1) )
584 return -EFAULT;
586 rc = rcu_lock_target_domain_by_id(domid, &d);
587 if ( rc )
588 return rc;
590 rc = xsm_memory_stat_reservation(current->domain, d);
591 if ( rc )
592 {
593 rcu_unlock_domain(d);
594 return rc;
595 }
597 switch ( op )
598 {
599 case XENMEM_current_reservation:
600 rc = d->tot_pages;
601 break;
602 case XENMEM_maximum_reservation:
603 rc = d->max_pages;
604 break;
605 default:
606 ASSERT(op == XENMEM_maximum_gpfn);
607 rc = domain_get_maximum_gpfn(d);
608 break;
609 }
611 rcu_unlock_domain(d);
613 break;
615 case XENMEM_translate_gpfn_list:
616 progress = cmd >> MEMOP_EXTENT_SHIFT;
617 rc = translate_gpfn_list(
618 guest_handle_cast(arg, xen_translate_gpfn_list_t),
619 &progress);
620 if ( rc == -EAGAIN )
621 return hypercall_create_continuation(
622 __HYPERVISOR_memory_op, "lh",
623 op | (progress << MEMOP_EXTENT_SHIFT), arg);
624 break;
626 default:
627 rc = arch_memory_op(op, arg);
628 break;
629 }
631 return rc;
632 }
634 /*
635 * Local variables:
636 * mode: C
637 * c-set-style: "BSD"
638 * c-basic-offset: 4
639 * tab-width: 4
640 * indent-tabs-mode: nil
641 * End:
642 */