ia64/xen-unstable

view xen/common/memory.c @ 17062:0769835cf50f

x86 shadow: Reduce scope of shadow lock.

emulate_map_dest doesn't require holding lock, since
only shadow related operation possibly involved is to
remove shadow which is less frequent and can acquire
lock inside. Rest are either guest table walk or
per-vcpu monitor table manipulation

Signed-off-by Kevin Tian <kevin.tian@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Feb 14 10:33:12 2008 +0000 (2008-02-14)
parents cff4c8a1aa28
children 4e2e98c2098e
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/paging.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <xen/hypercall.h>
21 #include <xen/errno.h>
22 #include <asm/current.h>
23 #include <asm/hardirq.h>
24 #include <public/memory.h>
25 #include <xsm/xsm.h>
27 struct memop_args {
28 /* INPUT */
29 struct domain *domain; /* Domain to be affected. */
30 XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */
31 unsigned int nr_extents; /* Number of extents to allocate or free. */
32 unsigned int extent_order; /* Size of each extent. */
33 unsigned int memflags; /* Allocation flags. */
35 /* INPUT/OUTPUT */
36 unsigned int nr_done; /* Number of extents processed so far. */
37 int preempted; /* Was the hypercall preempted? */
38 };
40 static unsigned int select_local_cpu(struct domain *d)
41 {
42 struct vcpu *v = d->vcpu[0];
43 return (v ? v->processor : 0);
44 }
46 static void increase_reservation(struct memop_args *a)
47 {
48 struct page_info *page;
49 unsigned long i;
50 xen_pfn_t mfn;
51 struct domain *d = a->domain;
52 unsigned int cpu = select_local_cpu(d);
54 if ( !guest_handle_is_null(a->extent_list) &&
55 !guest_handle_okay(a->extent_list, a->nr_extents) )
56 return;
58 if ( (a->extent_order != 0) &&
59 !multipage_allocation_permitted(current->domain) )
60 return;
62 for ( i = a->nr_done; i < a->nr_extents; i++ )
63 {
64 if ( hypercall_preempt_check() )
65 {
66 a->preempted = 1;
67 goto out;
68 }
70 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
71 if ( unlikely(page == NULL) )
72 {
73 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
74 "id=%d memflags=%x (%ld of %d)\n",
75 a->extent_order, d->domain_id, a->memflags,
76 i, a->nr_extents);
77 goto out;
78 }
80 /* Inform the domain of the new page's machine address. */
81 if ( !guest_handle_is_null(a->extent_list) )
82 {
83 mfn = page_to_mfn(page);
84 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
85 goto out;
86 }
87 }
89 out:
90 a->nr_done = i;
91 }
93 static void populate_physmap(struct memop_args *a)
94 {
95 struct page_info *page;
96 unsigned long i, j;
97 xen_pfn_t gpfn, mfn;
98 struct domain *d = a->domain;
99 unsigned int cpu = select_local_cpu(d);
101 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
102 return;
104 if ( (a->extent_order != 0) &&
105 !multipage_allocation_permitted(current->domain) )
106 return;
108 for ( i = a->nr_done; i < a->nr_extents; i++ )
109 {
110 if ( hypercall_preempt_check() )
111 {
112 a->preempted = 1;
113 goto out;
114 }
116 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
117 goto out;
119 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
120 if ( unlikely(page == NULL) )
121 {
122 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
123 "id=%d memflags=%x (%ld of %d)\n",
124 a->extent_order, d->domain_id, a->memflags,
125 i, a->nr_extents);
126 goto out;
127 }
129 mfn = page_to_mfn(page);
131 if ( unlikely(paging_mode_translate(d)) )
132 {
133 for ( j = 0; j < (1 << a->extent_order); j++ )
134 if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
135 goto out;
136 }
137 else
138 {
139 for ( j = 0; j < (1 << a->extent_order); j++ )
140 set_gpfn_from_mfn(mfn + j, gpfn + j);
142 /* Inform the domain of the new page's machine address. */
143 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
144 goto out;
145 }
146 }
148 out:
149 a->nr_done = i;
150 }
152 int guest_remove_page(struct domain *d, unsigned long gmfn)
153 {
154 struct page_info *page;
155 unsigned long mfn;
157 mfn = gmfn_to_mfn(d, gmfn);
158 if ( unlikely(!mfn_valid(mfn)) )
159 {
160 gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
161 d->domain_id, gmfn);
162 return 0;
163 }
165 page = mfn_to_page(mfn);
166 if ( unlikely(!get_page(page, d)) )
167 {
168 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
169 return 0;
170 }
172 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
173 put_page_and_type(page);
175 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
176 put_page(page);
178 guest_physmap_remove_page(d, gmfn, mfn);
180 put_page(page);
182 return 1;
183 }
185 static void decrease_reservation(struct memop_args *a)
186 {
187 unsigned long i, j;
188 xen_pfn_t gmfn;
190 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
191 return;
193 for ( i = a->nr_done; i < a->nr_extents; i++ )
194 {
195 if ( hypercall_preempt_check() )
196 {
197 a->preempted = 1;
198 goto out;
199 }
201 if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
202 goto out;
204 for ( j = 0; j < (1 << a->extent_order); j++ )
205 if ( !guest_remove_page(a->domain, gmfn + j) )
206 goto out;
207 }
209 out:
210 a->nr_done = i;
211 }
213 static long translate_gpfn_list(
214 XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
215 {
216 struct xen_translate_gpfn_list op;
217 unsigned long i;
218 xen_pfn_t gpfn;
219 xen_pfn_t mfn;
220 struct domain *d;
221 int rc;
223 if ( copy_from_guest(&op, uop, 1) )
224 return -EFAULT;
226 /* Is size too large for us to encode a continuation? */
227 if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
228 return -EINVAL;
230 if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
231 !guest_handle_okay(op.mfn_list, op.nr_gpfns) )
232 return -EFAULT;
234 if ( op.domid == DOMID_SELF )
235 d = current->domain;
236 else {
237 d = rcu_lock_domain_by_id(op.domid);
238 if ( d == NULL )
239 return -ESRCH;
240 if ( !IS_PRIV_FOR(current->domain, d) ) {
241 rcu_unlock_domain(d);
242 return -EPERM;
243 }
244 }
247 if ( !paging_mode_translate(d) )
248 {
249 rcu_unlock_domain(d);
250 return -EINVAL;
251 }
253 for ( i = *progress; i < op.nr_gpfns; i++ )
254 {
255 if ( hypercall_preempt_check() )
256 {
257 rcu_unlock_domain(d);
258 *progress = i;
259 return -EAGAIN;
260 }
262 if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
263 {
264 rcu_unlock_domain(d);
265 return -EFAULT;
266 }
268 mfn = gmfn_to_mfn(d, gpfn);
270 rc = xsm_translate_gpfn_list(current->domain, mfn);
271 if ( rc )
272 {
273 rcu_unlock_domain(d);
274 return rc;
275 }
277 if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
278 {
279 rcu_unlock_domain(d);
280 return -EFAULT;
281 }
282 }
284 rcu_unlock_domain(d);
285 return 0;
286 }
288 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
289 {
290 struct xen_memory_exchange exch;
291 LIST_HEAD(in_chunk_list);
292 LIST_HEAD(out_chunk_list);
293 unsigned long in_chunk_order, out_chunk_order;
294 xen_pfn_t gpfn, gmfn, mfn;
295 unsigned long i, j, k;
296 unsigned int memflags = 0, cpu;
297 long rc = 0;
298 struct domain *d;
299 struct page_info *page;
301 if ( copy_from_guest(&exch, arg, 1) )
302 return -EFAULT;
304 /* Various sanity checks. */
305 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
306 /* Input and output domain identifiers match? */
307 (exch.in.domid != exch.out.domid) ||
308 /* Sizes of input and output lists do not overflow a long? */
309 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
310 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
311 /* Sizes of input and output lists match? */
312 ((exch.in.nr_extents << exch.in.extent_order) !=
313 (exch.out.nr_extents << exch.out.extent_order)) )
314 {
315 rc = -EINVAL;
316 goto fail_early;
317 }
319 /* Only privileged guests can allocate multi-page contiguous extents. */
320 if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
321 !multipage_allocation_permitted(current->domain) )
322 {
323 rc = -EPERM;
324 goto fail_early;
325 }
327 if ( exch.in.extent_order <= exch.out.extent_order )
328 {
329 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
330 out_chunk_order = 0;
331 }
332 else
333 {
334 in_chunk_order = 0;
335 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
336 }
338 /*
339 * Only support exchange on calling domain right now. Otherwise there are
340 * tricky corner cases to consider (e.g., dying domain).
341 */
342 if ( unlikely(exch.in.domid != DOMID_SELF) )
343 {
344 rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
345 goto fail_early;
346 }
347 d = current->domain;
349 memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
350 d, exch.out.address_bits ? : (BITS_PER_LONG+PAGE_SHIFT)));
352 cpu = select_local_cpu(d);
354 for ( i = (exch.nr_exchanged >> in_chunk_order);
355 i < (exch.in.nr_extents >> in_chunk_order);
356 i++ )
357 {
358 if ( hypercall_preempt_check() )
359 {
360 exch.nr_exchanged = i << in_chunk_order;
361 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
362 return -EFAULT;
363 return hypercall_create_continuation(
364 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
365 }
367 /* Steal a chunk's worth of input pages from the domain. */
368 for ( j = 0; j < (1UL << in_chunk_order); j++ )
369 {
370 if ( unlikely(__copy_from_guest_offset(
371 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
372 {
373 rc = -EFAULT;
374 goto fail;
375 }
377 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
378 {
379 mfn = gmfn_to_mfn(d, gmfn + k);
380 if ( unlikely(!mfn_valid(mfn)) )
381 {
382 rc = -EINVAL;
383 goto fail;
384 }
386 page = mfn_to_page(mfn);
388 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
389 {
390 rc = -EINVAL;
391 goto fail;
392 }
394 list_add(&page->list, &in_chunk_list);
395 }
396 }
398 /* Allocate a chunk's worth of anonymous output pages. */
399 for ( j = 0; j < (1UL << out_chunk_order); j++ )
400 {
401 page = __alloc_domheap_pages(
402 NULL, cpu, exch.out.extent_order, memflags);
403 if ( unlikely(page == NULL) )
404 {
405 rc = -ENOMEM;
406 goto fail;
407 }
409 list_add(&page->list, &out_chunk_list);
410 }
412 /*
413 * Success! Beyond this point we cannot fail for this chunk.
414 */
416 /* Destroy final reference to each input page. */
417 while ( !list_empty(&in_chunk_list) )
418 {
419 page = list_entry(in_chunk_list.next, struct page_info, list);
420 list_del(&page->list);
421 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
422 BUG();
423 mfn = page_to_mfn(page);
424 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
425 put_page(page);
426 }
428 /* Assign each output page to the domain. */
429 j = 0;
430 while ( !list_empty(&out_chunk_list) )
431 {
432 page = list_entry(out_chunk_list.next, struct page_info, list);
433 list_del(&page->list);
434 if ( assign_pages(d, page, exch.out.extent_order,
435 MEMF_no_refcount) )
436 BUG();
438 /* Note that we ignore errors accessing the output extent list. */
439 (void)__copy_from_guest_offset(
440 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
442 mfn = page_to_mfn(page);
443 if ( unlikely(paging_mode_translate(d)) )
444 {
445 /* Ignore failure here. There's nothing we can do. */
446 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
447 (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
448 }
449 else
450 {
451 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
452 set_gpfn_from_mfn(mfn + k, gpfn + k);
453 (void)__copy_to_guest_offset(
454 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
455 }
457 j++;
458 }
459 BUG_ON(j != (1UL << out_chunk_order));
460 }
462 exch.nr_exchanged = exch.in.nr_extents;
463 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
464 rc = -EFAULT;
465 return rc;
467 /*
468 * Failed a chunk! Free any partial chunk work. Tell caller how many
469 * chunks succeeded.
470 */
471 fail:
472 /* Reassign any input pages we managed to steal. */
473 while ( !list_empty(&in_chunk_list) )
474 {
475 page = list_entry(in_chunk_list.next, struct page_info, list);
476 list_del(&page->list);
477 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
478 BUG();
479 }
481 /* Free any output pages we managed to allocate. */
482 while ( !list_empty(&out_chunk_list) )
483 {
484 page = list_entry(out_chunk_list.next, struct page_info, list);
485 list_del(&page->list);
486 free_domheap_pages(page, exch.out.extent_order);
487 }
489 exch.nr_exchanged = i << in_chunk_order;
491 fail_early:
492 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
493 rc = -EFAULT;
494 return rc;
495 }
497 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
498 {
499 struct domain *d;
500 int rc, op;
501 unsigned long start_extent, progress;
502 struct xen_memory_reservation reservation;
503 struct memop_args args;
504 domid_t domid;
506 op = cmd & MEMOP_CMD_MASK;
508 switch ( op )
509 {
510 case XENMEM_increase_reservation:
511 case XENMEM_decrease_reservation:
512 case XENMEM_populate_physmap:
513 start_extent = cmd >> MEMOP_EXTENT_SHIFT;
515 if ( copy_from_guest(&reservation, arg, 1) )
516 return start_extent;
518 /* Is size too large for us to encode a continuation? */
519 if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
520 return start_extent;
522 if ( unlikely(start_extent > reservation.nr_extents) )
523 return start_extent;
525 args.extent_list = reservation.extent_start;
526 args.nr_extents = reservation.nr_extents;
527 args.extent_order = reservation.extent_order;
528 args.nr_done = start_extent;
529 args.preempted = 0;
530 args.memflags = 0;
532 if ( (reservation.address_bits != 0) &&
533 (reservation.address_bits <
534 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
535 {
536 if ( reservation.address_bits <= PAGE_SHIFT )
537 return start_extent;
538 args.memflags = MEMF_bits(reservation.address_bits);
539 }
541 if ( likely(reservation.domid == DOMID_SELF) )
542 d = current->domain;
543 else {
544 d = rcu_lock_domain_by_id(reservation.domid);
545 if ( d == NULL)
546 return start_extent;
547 if ( !IS_PRIV_FOR(current->domain, d) ) {
548 rcu_unlock_domain(d);
549 return start_extent;
550 }
551 }
552 args.domain = d;
554 rc = xsm_memory_adjust_reservation(current->domain, d);
555 if ( rc )
556 {
557 if ( reservation.domid != DOMID_SELF )
558 rcu_unlock_domain(d);
559 return rc;
560 }
562 switch ( op )
563 {
564 case XENMEM_increase_reservation:
565 increase_reservation(&args);
566 break;
567 case XENMEM_decrease_reservation:
568 decrease_reservation(&args);
569 break;
570 default: /* XENMEM_populate_physmap */
571 populate_physmap(&args);
572 break;
573 }
575 if ( unlikely(reservation.domid != DOMID_SELF) )
576 rcu_unlock_domain(d);
578 rc = args.nr_done;
580 if ( args.preempted )
581 return hypercall_create_continuation(
582 __HYPERVISOR_memory_op, "lh",
583 op | (rc << MEMOP_EXTENT_SHIFT), arg);
585 break;
587 case XENMEM_exchange:
588 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
589 break;
591 case XENMEM_maximum_ram_page:
592 rc = max_page;
593 break;
595 case XENMEM_current_reservation:
596 case XENMEM_maximum_reservation:
597 case XENMEM_maximum_gpfn:
598 if ( copy_from_guest(&domid, arg, 1) )
599 return -EFAULT;
601 if ( likely(domid == DOMID_SELF) )
602 d = current->domain;
603 else {
604 d = rcu_lock_domain_by_id(domid);
605 if ( d == NULL )
606 return -ESRCH;
607 if ( !IS_PRIV_FOR(current->domain, d) ) {
608 rcu_unlock_domain(d);
609 return -EPERM;
610 }
611 }
613 rc = xsm_memory_stat_reservation(current->domain, d);
614 if ( rc )
615 {
616 if ( domid != DOMID_SELF )
617 rcu_unlock_domain(d);
618 return rc;
619 }
621 switch ( op )
622 {
623 case XENMEM_current_reservation:
624 rc = d->tot_pages;
625 break;
626 case XENMEM_maximum_reservation:
627 rc = d->max_pages;
628 break;
629 default:
630 ASSERT(op == XENMEM_maximum_gpfn);
631 rc = domain_get_maximum_gpfn(d);
632 break;
633 }
635 if ( unlikely(domid != DOMID_SELF) )
636 rcu_unlock_domain(d);
638 break;
640 case XENMEM_translate_gpfn_list:
641 progress = cmd >> MEMOP_EXTENT_SHIFT;
642 rc = translate_gpfn_list(
643 guest_handle_cast(arg, xen_translate_gpfn_list_t),
644 &progress);
645 if ( rc == -EAGAIN )
646 return hypercall_create_continuation(
647 __HYPERVISOR_memory_op, "lh",
648 op | (progress << MEMOP_EXTENT_SHIFT), arg);
649 break;
651 default:
652 rc = arch_memory_op(op, arg);
653 break;
654 }
656 return rc;
657 }
659 /*
660 * Local variables:
661 * mode: C
662 * c-set-style: "BSD"
663 * c-basic-offset: 4
664 * tab-width: 4
665 * indent-tabs-mode: nil
666 * End:
667 */