ia64/xen-unstable

view xen/common/memory.c @ 15927:b7eb2bb9b625

IRQ injection changes for HVM PCI passthru.
Signed-off-by: Allen Kay <allen.m.kay@intel.com>
Signed-off-by: Guy Zana <guy@neocleus.com>
author kfraser@localhost.localdomain
date Tue Sep 18 16:09:19 2007 +0100 (2007-09-18)
parents 96f64f4c42f0
children 2717128cbdd1
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/paging.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <xen/hypercall.h>
21 #include <xen/errno.h>
22 #include <asm/current.h>
23 #include <asm/hardirq.h>
24 #include <public/memory.h>
25 #include <xsm/xsm.h>
27 struct memop_args {
28 /* INPUT */
29 struct domain *domain; /* Domain to be affected. */
30 XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */
31 unsigned int nr_extents; /* Number of extents to allocate or free. */
32 unsigned int extent_order; /* Size of each extent. */
33 unsigned int memflags; /* Allocation flags. */
35 /* INPUT/OUTPUT */
36 unsigned int nr_done; /* Number of extents processed so far. */
37 int preempted; /* Was the hypercall preempted? */
38 };
40 static unsigned int select_local_cpu(struct domain *d)
41 {
42 struct vcpu *v = d->vcpu[0];
43 return (v ? v->processor : 0);
44 }
46 static void increase_reservation(struct memop_args *a)
47 {
48 struct page_info *page;
49 unsigned long i;
50 xen_pfn_t mfn;
51 struct domain *d = a->domain;
52 unsigned int cpu = select_local_cpu(d);
54 if ( !guest_handle_is_null(a->extent_list) &&
55 !guest_handle_okay(a->extent_list, a->nr_extents) )
56 return;
58 if ( (a->extent_order != 0) &&
59 !multipage_allocation_permitted(current->domain) )
60 return;
62 for ( i = a->nr_done; i < a->nr_extents; i++ )
63 {
64 if ( hypercall_preempt_check() )
65 {
66 a->preempted = 1;
67 goto out;
68 }
70 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
71 if ( unlikely(page == NULL) )
72 {
73 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
74 "id=%d memflags=%x (%ld of %d)\n",
75 a->extent_order, d->domain_id, a->memflags,
76 i, a->nr_extents);
77 goto out;
78 }
80 /* Inform the domain of the new page's machine address. */
81 if ( !guest_handle_is_null(a->extent_list) )
82 {
83 mfn = page_to_mfn(page);
84 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
85 goto out;
86 }
87 }
89 out:
90 a->nr_done = i;
91 }
93 static void populate_physmap(struct memop_args *a)
94 {
95 struct page_info *page;
96 unsigned long i, j;
97 xen_pfn_t gpfn, mfn;
98 struct domain *d = a->domain;
99 unsigned int cpu = select_local_cpu(d);
101 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
102 return;
104 if ( (a->extent_order != 0) &&
105 !multipage_allocation_permitted(current->domain) )
106 return;
108 for ( i = a->nr_done; i < a->nr_extents; i++ )
109 {
110 if ( hypercall_preempt_check() )
111 {
112 a->preempted = 1;
113 goto out;
114 }
116 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
117 goto out;
119 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
120 if ( unlikely(page == NULL) )
121 {
122 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
123 "id=%d memflags=%x (%ld of %d)\n",
124 a->extent_order, d->domain_id, a->memflags,
125 i, a->nr_extents);
126 goto out;
127 }
129 mfn = page_to_mfn(page);
131 if ( unlikely(paging_mode_translate(d)) )
132 {
133 for ( j = 0; j < (1 << a->extent_order); j++ )
134 guest_physmap_add_page(d, gpfn + j, mfn + j);
135 }
136 else
137 {
138 for ( j = 0; j < (1 << a->extent_order); j++ )
139 set_gpfn_from_mfn(mfn + j, gpfn + j);
141 /* Inform the domain of the new page's machine address. */
142 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
143 goto out;
144 }
145 }
147 out:
148 a->nr_done = i;
149 }
151 int guest_remove_page(struct domain *d, unsigned long gmfn)
152 {
153 struct page_info *page;
154 unsigned long mfn;
156 mfn = gmfn_to_mfn(d, gmfn);
157 if ( unlikely(!mfn_valid(mfn)) )
158 {
159 gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
160 d->domain_id, gmfn);
161 return 0;
162 }
164 page = mfn_to_page(mfn);
165 if ( unlikely(!get_page(page, d)) )
166 {
167 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
168 return 0;
169 }
171 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
172 put_page_and_type(page);
174 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
175 put_page(page);
177 guest_physmap_remove_page(d, gmfn, mfn);
179 put_page(page);
181 return 1;
182 }
184 static void decrease_reservation(struct memop_args *a)
185 {
186 unsigned long i, j;
187 xen_pfn_t gmfn;
189 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
190 return;
192 for ( i = a->nr_done; i < a->nr_extents; i++ )
193 {
194 if ( hypercall_preempt_check() )
195 {
196 a->preempted = 1;
197 goto out;
198 }
200 if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
201 goto out;
203 for ( j = 0; j < (1 << a->extent_order); j++ )
204 if ( !guest_remove_page(a->domain, gmfn + j) )
205 goto out;
206 }
208 out:
209 a->nr_done = i;
210 }
212 static long translate_gpfn_list(
213 XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
214 {
215 struct xen_translate_gpfn_list op;
216 unsigned long i;
217 xen_pfn_t gpfn;
218 xen_pfn_t mfn;
219 struct domain *d;
220 int rc;
222 if ( copy_from_guest(&op, uop, 1) )
223 return -EFAULT;
225 /* Is size too large for us to encode a continuation? */
226 if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
227 return -EINVAL;
229 if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
230 !guest_handle_okay(op.mfn_list, op.nr_gpfns) )
231 return -EFAULT;
233 if ( op.domid == DOMID_SELF )
234 op.domid = current->domain->domain_id;
235 else if ( !IS_PRIV(current->domain) )
236 return -EPERM;
238 if ( (d = rcu_lock_domain_by_id(op.domid)) == NULL )
239 return -ESRCH;
241 if ( !paging_mode_translate(d) )
242 {
243 rcu_unlock_domain(d);
244 return -EINVAL;
245 }
247 for ( i = *progress; i < op.nr_gpfns; i++ )
248 {
249 if ( hypercall_preempt_check() )
250 {
251 rcu_unlock_domain(d);
252 *progress = i;
253 return -EAGAIN;
254 }
256 if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
257 {
258 rcu_unlock_domain(d);
259 return -EFAULT;
260 }
262 mfn = gmfn_to_mfn(d, gpfn);
264 rc = xsm_translate_gpfn_list(current->domain, mfn);
265 if ( rc )
266 {
267 rcu_unlock_domain(d);
268 return rc;
269 }
271 if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
272 {
273 rcu_unlock_domain(d);
274 return -EFAULT;
275 }
276 }
278 rcu_unlock_domain(d);
279 return 0;
280 }
282 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
283 {
284 struct xen_memory_exchange exch;
285 LIST_HEAD(in_chunk_list);
286 LIST_HEAD(out_chunk_list);
287 unsigned long in_chunk_order, out_chunk_order;
288 xen_pfn_t gpfn, gmfn, mfn;
289 unsigned long i, j, k;
290 unsigned int memflags = 0, cpu;
291 long rc = 0;
292 struct domain *d;
293 struct page_info *page;
295 if ( copy_from_guest(&exch, arg, 1) )
296 return -EFAULT;
298 /* Various sanity checks. */
299 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
300 /* Input and output domain identifiers match? */
301 (exch.in.domid != exch.out.domid) ||
302 /* Sizes of input and output lists do not overflow a long? */
303 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
304 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
305 /* Sizes of input and output lists match? */
306 ((exch.in.nr_extents << exch.in.extent_order) !=
307 (exch.out.nr_extents << exch.out.extent_order)) )
308 {
309 rc = -EINVAL;
310 goto fail_early;
311 }
313 /* Only privileged guests can allocate multi-page contiguous extents. */
314 if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
315 !multipage_allocation_permitted(current->domain) )
316 {
317 rc = -EPERM;
318 goto fail_early;
319 }
321 if ( (exch.out.address_bits != 0) &&
322 (exch.out.address_bits <
323 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
324 {
325 if ( exch.out.address_bits <= PAGE_SHIFT )
326 {
327 rc = -ENOMEM;
328 goto fail_early;
329 }
330 memflags = MEMF_bits(exch.out.address_bits);
331 }
333 if ( exch.in.extent_order <= exch.out.extent_order )
334 {
335 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
336 out_chunk_order = 0;
337 }
338 else
339 {
340 in_chunk_order = 0;
341 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
342 }
344 /*
345 * Only support exchange on calling domain right now. Otherwise there are
346 * tricky corner cases to consider (e.g., dying domain).
347 */
348 if ( unlikely(exch.in.domid != DOMID_SELF) )
349 {
350 rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
351 goto fail_early;
352 }
353 d = current->domain;
355 cpu = select_local_cpu(d);
357 for ( i = (exch.nr_exchanged >> in_chunk_order);
358 i < (exch.in.nr_extents >> in_chunk_order);
359 i++ )
360 {
361 if ( hypercall_preempt_check() )
362 {
363 exch.nr_exchanged = i << in_chunk_order;
364 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
365 return -EFAULT;
366 return hypercall_create_continuation(
367 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
368 }
370 /* Steal a chunk's worth of input pages from the domain. */
371 for ( j = 0; j < (1UL << in_chunk_order); j++ )
372 {
373 if ( unlikely(__copy_from_guest_offset(
374 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
375 {
376 rc = -EFAULT;
377 goto fail;
378 }
380 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
381 {
382 mfn = gmfn_to_mfn(d, gmfn + k);
383 if ( unlikely(!mfn_valid(mfn)) )
384 {
385 rc = -EINVAL;
386 goto fail;
387 }
389 page = mfn_to_page(mfn);
391 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
392 {
393 rc = -EINVAL;
394 goto fail;
395 }
397 list_add(&page->list, &in_chunk_list);
398 }
399 }
401 /* Allocate a chunk's worth of anonymous output pages. */
402 for ( j = 0; j < (1UL << out_chunk_order); j++ )
403 {
404 page = __alloc_domheap_pages(
405 NULL, cpu, exch.out.extent_order, memflags);
406 if ( unlikely(page == NULL) )
407 {
408 rc = -ENOMEM;
409 goto fail;
410 }
412 list_add(&page->list, &out_chunk_list);
413 }
415 /*
416 * Success! Beyond this point we cannot fail for this chunk.
417 */
419 /* Destroy final reference to each input page. */
420 while ( !list_empty(&in_chunk_list) )
421 {
422 page = list_entry(in_chunk_list.next, struct page_info, list);
423 list_del(&page->list);
424 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
425 BUG();
426 mfn = page_to_mfn(page);
427 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
428 put_page(page);
429 }
431 /* Assign each output page to the domain. */
432 j = 0;
433 while ( !list_empty(&out_chunk_list) )
434 {
435 page = list_entry(out_chunk_list.next, struct page_info, list);
436 list_del(&page->list);
437 if ( assign_pages(d, page, exch.out.extent_order,
438 MEMF_no_refcount) )
439 BUG();
441 /* Note that we ignore errors accessing the output extent list. */
442 (void)__copy_from_guest_offset(
443 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
445 mfn = page_to_mfn(page);
446 if ( unlikely(paging_mode_translate(d)) )
447 {
448 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
449 guest_physmap_add_page(d, gpfn + k, mfn + k);
450 }
451 else
452 {
453 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
454 set_gpfn_from_mfn(mfn + k, gpfn + k);
455 (void)__copy_to_guest_offset(
456 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
457 }
459 j++;
460 }
461 BUG_ON(j != (1UL << out_chunk_order));
462 }
464 exch.nr_exchanged = exch.in.nr_extents;
465 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
466 rc = -EFAULT;
467 return rc;
469 /*
470 * Failed a chunk! Free any partial chunk work. Tell caller how many
471 * chunks succeeded.
472 */
473 fail:
474 /* Reassign any input pages we managed to steal. */
475 while ( !list_empty(&in_chunk_list) )
476 {
477 page = list_entry(in_chunk_list.next, struct page_info, list);
478 list_del(&page->list);
479 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
480 BUG();
481 }
483 /* Free any output pages we managed to allocate. */
484 while ( !list_empty(&out_chunk_list) )
485 {
486 page = list_entry(out_chunk_list.next, struct page_info, list);
487 list_del(&page->list);
488 free_domheap_pages(page, exch.out.extent_order);
489 }
491 exch.nr_exchanged = i << in_chunk_order;
493 fail_early:
494 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
495 rc = -EFAULT;
496 return rc;
497 }
499 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
500 {
501 struct domain *d;
502 int rc, op;
503 unsigned long start_extent, progress;
504 struct xen_memory_reservation reservation;
505 struct memop_args args;
506 domid_t domid;
508 op = cmd & MEMOP_CMD_MASK;
510 switch ( op )
511 {
512 case XENMEM_increase_reservation:
513 case XENMEM_decrease_reservation:
514 case XENMEM_populate_physmap:
515 start_extent = cmd >> MEMOP_EXTENT_SHIFT;
517 if ( copy_from_guest(&reservation, arg, 1) )
518 return start_extent;
520 /* Is size too large for us to encode a continuation? */
521 if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
522 return start_extent;
524 if ( unlikely(start_extent > reservation.nr_extents) )
525 return start_extent;
527 args.extent_list = reservation.extent_start;
528 args.nr_extents = reservation.nr_extents;
529 args.extent_order = reservation.extent_order;
530 args.nr_done = start_extent;
531 args.preempted = 0;
532 args.memflags = 0;
534 if ( (reservation.address_bits != 0) &&
535 (reservation.address_bits <
536 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
537 {
538 if ( reservation.address_bits <= PAGE_SHIFT )
539 return start_extent;
540 args.memflags = MEMF_bits(reservation.address_bits);
541 }
543 if ( likely(reservation.domid == DOMID_SELF) )
544 d = current->domain;
545 else if ( !IS_PRIV(current->domain) ||
546 ((d = rcu_lock_domain_by_id(reservation.domid)) == NULL) )
547 return start_extent;
548 args.domain = d;
550 rc = xsm_memory_adjust_reservation(current->domain, d);
551 if ( rc )
552 {
553 if ( reservation.domid != DOMID_SELF )
554 rcu_unlock_domain(d);
555 return rc;
556 }
558 switch ( op )
559 {
560 case XENMEM_increase_reservation:
561 increase_reservation(&args);
562 break;
563 case XENMEM_decrease_reservation:
564 decrease_reservation(&args);
565 break;
566 default: /* XENMEM_populate_physmap */
567 populate_physmap(&args);
568 break;
569 }
571 if ( unlikely(reservation.domid != DOMID_SELF) )
572 rcu_unlock_domain(d);
574 rc = args.nr_done;
576 if ( args.preempted )
577 return hypercall_create_continuation(
578 __HYPERVISOR_memory_op, "lh",
579 op | (rc << MEMOP_EXTENT_SHIFT), arg);
581 break;
583 case XENMEM_exchange:
584 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
585 break;
587 case XENMEM_maximum_ram_page:
588 rc = max_page;
589 break;
591 case XENMEM_current_reservation:
592 case XENMEM_maximum_reservation:
593 case XENMEM_maximum_gpfn:
594 if ( copy_from_guest(&domid, arg, 1) )
595 return -EFAULT;
597 if ( likely(domid == DOMID_SELF) )
598 d = current->domain;
599 else if ( !IS_PRIV(current->domain) )
600 return -EPERM;
601 else if ( (d = rcu_lock_domain_by_id(domid)) == NULL )
602 return -ESRCH;
604 rc = xsm_memory_stat_reservation(current->domain, d);
605 if ( rc )
606 {
607 if ( domid != DOMID_SELF )
608 rcu_unlock_domain(d);
609 return rc;
610 }
612 switch ( op )
613 {
614 case XENMEM_current_reservation:
615 rc = d->tot_pages;
616 break;
617 case XENMEM_maximum_reservation:
618 rc = d->max_pages;
619 break;
620 default:
621 ASSERT(op == XENMEM_maximum_gpfn);
622 rc = domain_get_maximum_gpfn(d);
623 break;
624 }
626 if ( unlikely(domid != DOMID_SELF) )
627 rcu_unlock_domain(d);
629 break;
631 case XENMEM_translate_gpfn_list:
632 progress = cmd >> MEMOP_EXTENT_SHIFT;
633 rc = translate_gpfn_list(
634 guest_handle_cast(arg, xen_translate_gpfn_list_t),
635 &progress);
636 if ( rc == -EAGAIN )
637 return hypercall_create_continuation(
638 __HYPERVISOR_memory_op, "lh",
639 op | (progress << MEMOP_EXTENT_SHIFT), arg);
640 break;
642 default:
643 rc = arch_memory_op(op, arg);
644 break;
645 }
647 return rc;
648 }
650 /*
651 * Local variables:
652 * mode: C
653 * c-set-style: "BSD"
654 * c-basic-offset: 4
655 * tab-width: 4
656 * indent-tabs-mode: nil
657 * End:
658 */