ia64/xen-unstable

view xen/common/memory.c @ 10584:4260eb8c0874

kunmap_atomic() must zap the PTE to avoid dangling references
when attempting to free memory back to Xen. We can implement
something more efficient in future.

Also add debug print message if guest tries to free 'in use'
memory. We'll make it a real guest-visible error in future.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@dhcp93.uk.xensource.com
date Wed Jun 28 18:17:41 2006 +0100 (2006-06-28)
parents 716e365377f5
children d51a5ca0fa99
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/shadow.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <asm/current.h>
21 #include <asm/hardirq.h>
22 #include <public/memory.h>
24 /*
25 * To allow safe resume of do_memory_op() after preemption, we need to know
26 * at what point in the page list to resume. For this purpose I steal the
27 * high-order bits of the @cmd parameter, which are otherwise unused and zero.
28 */
29 #define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
31 static long
32 increase_reservation(
33 struct domain *d,
34 XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
35 unsigned int nr_extents,
36 unsigned int extent_order,
37 unsigned int memflags,
38 int *preempted)
39 {
40 struct page_info *page;
41 unsigned long i;
42 xen_pfn_t mfn;
44 if ( !guest_handle_is_null(extent_list) &&
45 !guest_handle_okay(extent_list, nr_extents) )
46 return 0;
48 if ( (extent_order != 0) &&
49 !multipage_allocation_permitted(current->domain) )
50 return 0;
52 for ( i = 0; i < nr_extents; i++ )
53 {
54 if ( hypercall_preempt_check() )
55 {
56 *preempted = 1;
57 return i;
58 }
60 if ( unlikely((page = alloc_domheap_pages(
61 d, extent_order, memflags)) == NULL) )
62 {
63 DPRINTK("Could not allocate order=%d extent: "
64 "id=%d memflags=%x (%ld of %d)\n",
65 extent_order, d->domain_id, memflags, i, nr_extents);
66 return i;
67 }
69 /* Inform the domain of the new page's machine address. */
70 if ( !guest_handle_is_null(extent_list) )
71 {
72 mfn = page_to_mfn(page);
73 if ( unlikely(__copy_to_guest_offset(extent_list, i, &mfn, 1)) )
74 return i;
75 }
76 }
78 return nr_extents;
79 }
81 static long
82 populate_physmap(
83 struct domain *d,
84 XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
85 unsigned int nr_extents,
86 unsigned int extent_order,
87 unsigned int memflags,
88 int *preempted)
89 {
90 struct page_info *page;
91 unsigned long i, j;
92 xen_pfn_t gpfn;
93 xen_pfn_t mfn;
95 if ( !guest_handle_okay(extent_list, nr_extents) )
96 return 0;
98 if ( (extent_order != 0) &&
99 !multipage_allocation_permitted(current->domain) )
100 return 0;
102 for ( i = 0; i < nr_extents; i++ )
103 {
104 if ( hypercall_preempt_check() )
105 {
106 *preempted = 1;
107 goto out;
108 }
110 if ( unlikely(__copy_from_guest_offset(&gpfn, extent_list, i, 1)) )
111 goto out;
113 if ( unlikely((page = alloc_domheap_pages(
114 d, extent_order, memflags)) == NULL) )
115 {
116 DPRINTK("Could not allocate order=%d extent: "
117 "id=%d memflags=%x (%ld of %d)\n",
118 extent_order, d->domain_id, memflags, i, nr_extents);
119 goto out;
120 }
122 mfn = page_to_mfn(page);
124 if ( unlikely(shadow_mode_translate(d)) )
125 {
126 for ( j = 0; j < (1 << extent_order); j++ )
127 guest_physmap_add_page(d, gpfn + j, mfn + j);
128 }
129 else
130 {
131 for ( j = 0; j < (1 << extent_order); j++ )
132 set_gpfn_from_mfn(mfn + j, gpfn + j);
134 /* Inform the domain of the new page's machine address. */
135 if ( unlikely(__copy_to_guest_offset(extent_list, i, &mfn, 1)) )
136 goto out;
137 }
138 }
140 out:
141 return i;
142 }
144 int
145 guest_remove_page(
146 struct domain *d,
147 unsigned long gmfn)
148 {
149 struct page_info *page;
150 unsigned long mfn;
152 mfn = gmfn_to_mfn(d, gmfn);
153 if ( unlikely(!mfn_valid(mfn)) )
154 {
155 DPRINTK("Domain %u page number %lx invalid\n",
156 d->domain_id, mfn);
157 return 0;
158 }
160 page = mfn_to_page(mfn);
161 if ( unlikely(!get_page(page, d)) )
162 {
163 DPRINTK("Bad page free for domain %u\n", d->domain_id);
164 return 0;
165 }
167 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
168 put_page_and_type(page);
170 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
171 put_page(page);
173 if ( unlikely((page->count_info & PGC_count_mask) != 1) )
174 {
175 /* We'll make this a guest-visible error in future, so take heed! */
176 DPRINTK("Dom%d freeing in-use page %lx (pseudophys %lx):"
177 " count=%x type=%lx\n",
178 d->domain_id, mfn, get_gpfn_from_mfn(mfn),
179 page->count_info, page->u.inuse.type_info);
180 }
182 guest_physmap_remove_page(d, gmfn, mfn);
184 put_page(page);
186 return 1;
187 }
189 static long
190 decrease_reservation(
191 struct domain *d,
192 XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
193 unsigned int nr_extents,
194 unsigned int extent_order,
195 int *preempted)
196 {
197 unsigned long i, j;
198 xen_pfn_t gmfn;
200 if ( !guest_handle_okay(extent_list, nr_extents) )
201 return 0;
203 for ( i = 0; i < nr_extents; i++ )
204 {
205 if ( hypercall_preempt_check() )
206 {
207 *preempted = 1;
208 return i;
209 }
211 if ( unlikely(__copy_from_guest_offset(&gmfn, extent_list, i, 1)) )
212 return i;
214 for ( j = 0; j < (1 << extent_order); j++ )
215 {
216 if ( !guest_remove_page(d, gmfn + j) )
217 return i;
218 }
219 }
221 return nr_extents;
222 }
224 static long
225 translate_gpfn_list(
226 XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
227 {
228 struct xen_translate_gpfn_list op;
229 unsigned long i;
230 xen_pfn_t gpfn;
231 xen_pfn_t mfn;
232 struct domain *d;
234 if ( copy_from_guest(&op, uop, 1) )
235 return -EFAULT;
237 /* Is size too large for us to encode a continuation? */
238 if ( op.nr_gpfns > (ULONG_MAX >> START_EXTENT_SHIFT) )
239 return -EINVAL;
241 if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
242 !guest_handle_okay(op.mfn_list, op.nr_gpfns) )
243 return -EFAULT;
245 if ( op.domid == DOMID_SELF )
246 op.domid = current->domain->domain_id;
247 else if ( !IS_PRIV(current->domain) )
248 return -EPERM;
250 if ( (d = find_domain_by_id(op.domid)) == NULL )
251 return -ESRCH;
253 if ( !shadow_mode_translate(d) )
254 {
255 put_domain(d);
256 return -EINVAL;
257 }
259 for ( i = *progress; i < op.nr_gpfns; i++ )
260 {
261 if ( hypercall_preempt_check() )
262 {
263 put_domain(d);
264 *progress = i;
265 return -EAGAIN;
266 }
268 if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
269 {
270 put_domain(d);
271 return -EFAULT;
272 }
274 mfn = gmfn_to_mfn(d, gpfn);
276 if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
277 {
278 put_domain(d);
279 return -EFAULT;
280 }
281 }
283 put_domain(d);
284 return 0;
285 }
287 static long
288 memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
289 {
290 struct xen_memory_exchange exch;
291 LIST_HEAD(in_chunk_list);
292 LIST_HEAD(out_chunk_list);
293 unsigned long in_chunk_order, out_chunk_order;
294 xen_pfn_t gpfn, gmfn, mfn;
295 unsigned long i, j, k;
296 unsigned int memflags = 0;
297 long rc = 0;
298 struct domain *d;
299 struct page_info *page;
301 if ( copy_from_guest(&exch, arg, 1) )
302 return -EFAULT;
304 /* Various sanity checks. */
305 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
306 /* Input and output domain identifiers match? */
307 (exch.in.domid != exch.out.domid) ||
308 /* Sizes of input and output lists do not overflow a long? */
309 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
310 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
311 /* Sizes of input and output lists match? */
312 ((exch.in.nr_extents << exch.in.extent_order) !=
313 (exch.out.nr_extents << exch.out.extent_order)) )
314 {
315 rc = -EINVAL;
316 goto fail_early;
317 }
319 /* Only privileged guests can allocate multi-page contiguous extents. */
320 if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
321 !multipage_allocation_permitted(current->domain) )
322 {
323 rc = -EPERM;
324 goto fail_early;
325 }
327 if ( (exch.out.address_bits != 0) &&
328 (exch.out.address_bits <
329 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
330 {
331 if ( exch.out.address_bits < 31 )
332 {
333 rc = -ENOMEM;
334 goto fail_early;
335 }
336 memflags = MEMF_dma;
337 }
339 guest_handle_add_offset(exch.in.extent_start, exch.nr_exchanged);
340 exch.in.nr_extents -= exch.nr_exchanged;
342 if ( exch.in.extent_order <= exch.out.extent_order )
343 {
344 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
345 out_chunk_order = 0;
346 guest_handle_add_offset(
347 exch.out.extent_start, exch.nr_exchanged >> in_chunk_order);
348 exch.out.nr_extents -= exch.nr_exchanged >> in_chunk_order;
349 }
350 else
351 {
352 in_chunk_order = 0;
353 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
354 guest_handle_add_offset(
355 exch.out.extent_start, exch.nr_exchanged << out_chunk_order);
356 exch.out.nr_extents -= exch.nr_exchanged << out_chunk_order;
357 }
359 /*
360 * Only support exchange on calling domain right now. Otherwise there are
361 * tricky corner cases to consider (e.g., DOMF_dying domain).
362 */
363 if ( unlikely(exch.in.domid != DOMID_SELF) )
364 {
365 rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
366 goto fail_early;
367 }
368 d = current->domain;
370 for ( i = 0; i < (exch.in.nr_extents >> in_chunk_order); i++ )
371 {
372 if ( hypercall_preempt_check() )
373 {
374 exch.nr_exchanged += i << in_chunk_order;
375 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
376 return -EFAULT;
377 return hypercall_create_continuation(
378 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
379 }
381 /* Steal a chunk's worth of input pages from the domain. */
382 for ( j = 0; j < (1UL << in_chunk_order); j++ )
383 {
384 if ( unlikely(__copy_from_guest_offset(
385 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
386 {
387 rc = -EFAULT;
388 goto fail;
389 }
391 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
392 {
393 mfn = gmfn_to_mfn(d, gmfn + k);
394 if ( unlikely(!mfn_valid(mfn)) )
395 {
396 rc = -EINVAL;
397 goto fail;
398 }
400 page = mfn_to_page(mfn);
402 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
403 {
404 rc = -EINVAL;
405 goto fail;
406 }
408 list_add(&page->list, &in_chunk_list);
409 }
410 }
412 /* Allocate a chunk's worth of anonymous output pages. */
413 for ( j = 0; j < (1UL << out_chunk_order); j++ )
414 {
415 page = alloc_domheap_pages(
416 NULL, exch.out.extent_order, memflags);
417 if ( unlikely(page == NULL) )
418 {
419 rc = -ENOMEM;
420 goto fail;
421 }
423 list_add(&page->list, &out_chunk_list);
424 }
426 /*
427 * Success! Beyond this point we cannot fail for this chunk.
428 */
430 /* Destroy final reference to each input page. */
431 while ( !list_empty(&in_chunk_list) )
432 {
433 page = list_entry(in_chunk_list.next, struct page_info, list);
434 list_del(&page->list);
435 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
436 BUG();
437 mfn = page_to_mfn(page);
438 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
439 put_page(page);
440 }
442 /* Assign each output page to the domain. */
443 j = 0;
444 while ( !list_empty(&out_chunk_list) )
445 {
446 page = list_entry(out_chunk_list.next, struct page_info, list);
447 list_del(&page->list);
448 if ( assign_pages(d, page, exch.out.extent_order,
449 MEMF_no_refcount) )
450 BUG();
452 /* Note that we ignore errors accessing the output extent list. */
453 (void)__copy_from_guest_offset(
454 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
456 mfn = page_to_mfn(page);
457 if ( unlikely(shadow_mode_translate(d)) )
458 {
459 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
460 guest_physmap_add_page(d, gpfn + k, mfn + k);
461 }
462 else
463 {
464 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
465 set_gpfn_from_mfn(mfn + k, gpfn + k);
466 (void)__copy_to_guest_offset(
467 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
468 }
470 j++;
471 }
472 BUG_ON(j != (1UL << out_chunk_order));
473 }
475 exch.nr_exchanged += exch.in.nr_extents;
476 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
477 rc = -EFAULT;
478 return rc;
480 /*
481 * Failed a chunk! Free any partial chunk work. Tell caller how many
482 * chunks succeeded.
483 */
484 fail:
485 /* Reassign any input pages we managed to steal. */
486 while ( !list_empty(&in_chunk_list) )
487 {
488 page = list_entry(in_chunk_list.next, struct page_info, list);
489 list_del(&page->list);
490 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
491 BUG();
492 }
494 /* Free any output pages we managed to allocate. */
495 while ( !list_empty(&out_chunk_list) )
496 {
497 page = list_entry(out_chunk_list.next, struct page_info, list);
498 list_del(&page->list);
499 free_domheap_pages(page, exch.out.extent_order);
500 }
502 exch.nr_exchanged += i << in_chunk_order;
504 fail_early:
505 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
506 rc = -EFAULT;
507 return rc;
508 }
510 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
511 {
512 struct domain *d;
513 int rc, op, preempted = 0;
514 unsigned int memflags = 0;
515 unsigned long start_extent, progress;
516 struct xen_memory_reservation reservation;
517 domid_t domid;
519 op = cmd & ((1 << START_EXTENT_SHIFT) - 1);
521 switch ( op )
522 {
523 case XENMEM_increase_reservation:
524 case XENMEM_decrease_reservation:
525 case XENMEM_populate_physmap:
526 start_extent = cmd >> START_EXTENT_SHIFT;
528 if ( copy_from_guest(&reservation, arg, 1) )
529 return start_extent;
531 /* Is size too large for us to encode a continuation? */
532 if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) )
533 return start_extent;
535 if ( unlikely(start_extent > reservation.nr_extents) )
536 return start_extent;
538 if ( !guest_handle_is_null(reservation.extent_start) )
539 guest_handle_add_offset(reservation.extent_start, start_extent);
540 reservation.nr_extents -= start_extent;
542 if ( (reservation.address_bits != 0) &&
543 (reservation.address_bits <
544 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
545 {
546 if ( reservation.address_bits < 31 )
547 return start_extent;
548 memflags = MEMF_dma;
549 }
551 if ( likely(reservation.domid == DOMID_SELF) )
552 d = current->domain;
553 else if ( !IS_PRIV(current->domain) ||
554 ((d = find_domain_by_id(reservation.domid)) == NULL) )
555 return start_extent;
557 switch ( op )
558 {
559 case XENMEM_increase_reservation:
560 rc = increase_reservation(
561 d,
562 reservation.extent_start,
563 reservation.nr_extents,
564 reservation.extent_order,
565 memflags,
566 &preempted);
567 break;
568 case XENMEM_decrease_reservation:
569 rc = decrease_reservation(
570 d,
571 reservation.extent_start,
572 reservation.nr_extents,
573 reservation.extent_order,
574 &preempted);
575 break;
576 case XENMEM_populate_physmap:
577 default:
578 rc = populate_physmap(
579 d,
580 reservation.extent_start,
581 reservation.nr_extents,
582 reservation.extent_order,
583 memflags,
584 &preempted);
585 break;
586 }
588 if ( unlikely(reservation.domid != DOMID_SELF) )
589 put_domain(d);
591 rc += start_extent;
593 if ( preempted )
594 return hypercall_create_continuation(
595 __HYPERVISOR_memory_op, "lh",
596 op | (rc << START_EXTENT_SHIFT), arg);
598 break;
600 case XENMEM_exchange:
601 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
602 break;
604 case XENMEM_maximum_ram_page:
605 rc = max_page;
606 break;
608 case XENMEM_current_reservation:
609 case XENMEM_maximum_reservation:
610 if ( copy_from_guest(&domid, arg, 1) )
611 return -EFAULT;
613 if ( likely(domid == DOMID_SELF) )
614 d = current->domain;
615 else if ( !IS_PRIV(current->domain) )
616 return -EPERM;
617 else if ( (d = find_domain_by_id(domid)) == NULL )
618 return -ESRCH;
620 rc = (op == XENMEM_current_reservation) ? d->tot_pages : d->max_pages;
622 if ( unlikely(domid != DOMID_SELF) )
623 put_domain(d);
625 break;
627 case XENMEM_translate_gpfn_list:
628 progress = cmd >> START_EXTENT_SHIFT;
629 rc = translate_gpfn_list(
630 guest_handle_cast(arg, xen_translate_gpfn_list_t),
631 &progress);
632 if ( rc == -EAGAIN )
633 return hypercall_create_continuation(
634 __HYPERVISOR_memory_op, "lh",
635 op | (progress << START_EXTENT_SHIFT), arg);
636 break;
638 default:
639 rc = arch_memory_op(op, arg);
640 break;
641 }
643 return rc;
644 }
646 /*
647 * Local variables:
648 * mode: C
649 * c-set-style: "BSD"
650 * c-basic-offset: 4
651 * tab-width: 4
652 * indent-tabs-mode: nil
653 * End:
654 */