direct-io.hg

view xen/common/memory.c @ 10749:5fa2cd68d059

[IA64] Fix of C/S 10529:4260eb8c08740de0000081c61a6237ffcb95b2d5 for IA64.
When page is zapped from a domain, the page referenced counter
is checked. But it results in false positive alert on Xen/IA64
because a page 'in use' has reference count 2 on Xen/IA64.
- a page is assigned to guest domain's psudo physical address space.
This is decremented by guest_physmap_remove_page()
- a page is allocated for a domain.
This is decremented by the following put_page()

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author kfraser@localhost.localdomain
date Thu Jul 27 13:17:17 2006 +0100 (2006-07-27)
parents d51a5ca0fa99
children 0f917d63e960
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/shadow.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <asm/current.h>
21 #include <asm/hardirq.h>
22 #include <public/memory.h>
24 /*
25 * To allow safe resume of do_memory_op() after preemption, we need to know
26 * at what point in the page list to resume. For this purpose I steal the
27 * high-order bits of the @cmd parameter, which are otherwise unused and zero.
28 */
29 #define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
31 static long
32 increase_reservation(
33 struct domain *d,
34 XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
35 unsigned int nr_extents,
36 unsigned int extent_order,
37 unsigned int memflags,
38 int *preempted)
39 {
40 struct page_info *page;
41 unsigned long i;
42 xen_pfn_t mfn;
44 if ( !guest_handle_is_null(extent_list) &&
45 !guest_handle_okay(extent_list, nr_extents) )
46 return 0;
48 if ( (extent_order != 0) &&
49 !multipage_allocation_permitted(current->domain) )
50 return 0;
52 for ( i = 0; i < nr_extents; i++ )
53 {
54 if ( hypercall_preempt_check() )
55 {
56 *preempted = 1;
57 return i;
58 }
60 if ( unlikely((page = alloc_domheap_pages(
61 d, extent_order, memflags)) == NULL) )
62 {
63 DPRINTK("Could not allocate order=%d extent: "
64 "id=%d memflags=%x (%ld of %d)\n",
65 extent_order, d->domain_id, memflags, i, nr_extents);
66 return i;
67 }
69 /* Inform the domain of the new page's machine address. */
70 if ( !guest_handle_is_null(extent_list) )
71 {
72 mfn = page_to_mfn(page);
73 if ( unlikely(__copy_to_guest_offset(extent_list, i, &mfn, 1)) )
74 return i;
75 }
76 }
78 return nr_extents;
79 }
81 static long
82 populate_physmap(
83 struct domain *d,
84 XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
85 unsigned int nr_extents,
86 unsigned int extent_order,
87 unsigned int memflags,
88 int *preempted)
89 {
90 struct page_info *page;
91 unsigned long i, j;
92 xen_pfn_t gpfn;
93 xen_pfn_t mfn;
95 if ( !guest_handle_okay(extent_list, nr_extents) )
96 return 0;
98 if ( (extent_order != 0) &&
99 !multipage_allocation_permitted(current->domain) )
100 return 0;
102 for ( i = 0; i < nr_extents; i++ )
103 {
104 if ( hypercall_preempt_check() )
105 {
106 *preempted = 1;
107 goto out;
108 }
110 if ( unlikely(__copy_from_guest_offset(&gpfn, extent_list, i, 1)) )
111 goto out;
113 if ( unlikely((page = alloc_domheap_pages(
114 d, extent_order, memflags)) == NULL) )
115 {
116 DPRINTK("Could not allocate order=%d extent: "
117 "id=%d memflags=%x (%ld of %d)\n",
118 extent_order, d->domain_id, memflags, i, nr_extents);
119 goto out;
120 }
122 mfn = page_to_mfn(page);
124 if ( unlikely(shadow_mode_translate(d)) )
125 {
126 for ( j = 0; j < (1 << extent_order); j++ )
127 guest_physmap_add_page(d, gpfn + j, mfn + j);
128 }
129 else
130 {
131 for ( j = 0; j < (1 << extent_order); j++ )
132 set_gpfn_from_mfn(mfn + j, gpfn + j);
134 /* Inform the domain of the new page's machine address. */
135 if ( unlikely(__copy_to_guest_offset(extent_list, i, &mfn, 1)) )
136 goto out;
137 }
138 }
140 out:
141 return i;
142 }
144 int
145 guest_remove_page(
146 struct domain *d,
147 unsigned long gmfn)
148 {
149 struct page_info *page;
150 unsigned long mfn;
152 mfn = gmfn_to_mfn(d, gmfn);
153 if ( unlikely(!mfn_valid(mfn)) )
154 {
155 DPRINTK("Domain %u page number %lx invalid\n",
156 d->domain_id, mfn);
157 return 0;
158 }
160 page = mfn_to_page(mfn);
161 if ( unlikely(!get_page(page, d)) )
162 {
163 DPRINTK("Bad page free for domain %u\n", d->domain_id);
164 return 0;
165 }
167 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
168 put_page_and_type(page);
170 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
171 put_page(page);
173 if ( unlikely(!page_is_removable(page)) )
174 {
175 /* We'll make this a guest-visible error in future, so take heed! */
176 DPRINTK("Dom%d freeing in-use page %lx (pseudophys %lx):"
177 " count=%lx type=%lx\n",
178 d->domain_id, mfn, get_gpfn_from_mfn(mfn),
179 (unsigned long)page->count_info, page->u.inuse.type_info);
180 }
182 guest_physmap_remove_page(d, gmfn, mfn);
184 put_page(page);
186 return 1;
187 }
189 static long
190 decrease_reservation(
191 struct domain *d,
192 XEN_GUEST_HANDLE(xen_pfn_t) extent_list,
193 unsigned int nr_extents,
194 unsigned int extent_order,
195 int *preempted)
196 {
197 unsigned long i, j;
198 xen_pfn_t gmfn;
200 if ( !guest_handle_okay(extent_list, nr_extents) )
201 return 0;
203 for ( i = 0; i < nr_extents; i++ )
204 {
205 if ( hypercall_preempt_check() )
206 {
207 *preempted = 1;
208 return i;
209 }
211 if ( unlikely(__copy_from_guest_offset(&gmfn, extent_list, i, 1)) )
212 return i;
214 for ( j = 0; j < (1 << extent_order); j++ )
215 {
216 if ( !guest_remove_page(d, gmfn + j) )
217 return i;
218 }
219 }
221 return nr_extents;
222 }
224 static long
225 translate_gpfn_list(
226 XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
227 {
228 struct xen_translate_gpfn_list op;
229 unsigned long i;
230 xen_pfn_t gpfn;
231 xen_pfn_t mfn;
232 struct domain *d;
234 if ( copy_from_guest(&op, uop, 1) )
235 return -EFAULT;
237 /* Is size too large for us to encode a continuation? */
238 if ( op.nr_gpfns > (ULONG_MAX >> START_EXTENT_SHIFT) )
239 return -EINVAL;
241 if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
242 !guest_handle_okay(op.mfn_list, op.nr_gpfns) )
243 return -EFAULT;
245 if ( op.domid == DOMID_SELF )
246 op.domid = current->domain->domain_id;
247 else if ( !IS_PRIV(current->domain) )
248 return -EPERM;
250 if ( (d = find_domain_by_id(op.domid)) == NULL )
251 return -ESRCH;
253 if ( !shadow_mode_translate(d) )
254 {
255 put_domain(d);
256 return -EINVAL;
257 }
259 for ( i = *progress; i < op.nr_gpfns; i++ )
260 {
261 if ( hypercall_preempt_check() )
262 {
263 put_domain(d);
264 *progress = i;
265 return -EAGAIN;
266 }
268 if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
269 {
270 put_domain(d);
271 return -EFAULT;
272 }
274 mfn = gmfn_to_mfn(d, gpfn);
276 if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
277 {
278 put_domain(d);
279 return -EFAULT;
280 }
281 }
283 put_domain(d);
284 return 0;
285 }
287 static long
288 memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
289 {
290 struct xen_memory_exchange exch;
291 LIST_HEAD(in_chunk_list);
292 LIST_HEAD(out_chunk_list);
293 unsigned long in_chunk_order, out_chunk_order;
294 xen_pfn_t gpfn, gmfn, mfn;
295 unsigned long i, j, k;
296 unsigned int memflags = 0;
297 long rc = 0;
298 struct domain *d;
299 struct page_info *page;
301 if ( copy_from_guest(&exch, arg, 1) )
302 return -EFAULT;
304 /* Various sanity checks. */
305 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
306 /* Input and output domain identifiers match? */
307 (exch.in.domid != exch.out.domid) ||
308 /* Sizes of input and output lists do not overflow a long? */
309 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
310 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
311 /* Sizes of input and output lists match? */
312 ((exch.in.nr_extents << exch.in.extent_order) !=
313 (exch.out.nr_extents << exch.out.extent_order)) )
314 {
315 rc = -EINVAL;
316 goto fail_early;
317 }
319 /* Only privileged guests can allocate multi-page contiguous extents. */
320 if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
321 !multipage_allocation_permitted(current->domain) )
322 {
323 rc = -EPERM;
324 goto fail_early;
325 }
327 if ( (exch.out.address_bits != 0) &&
328 (exch.out.address_bits <
329 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
330 {
331 if ( exch.out.address_bits < 31 )
332 {
333 rc = -ENOMEM;
334 goto fail_early;
335 }
336 memflags = MEMF_dma;
337 }
339 guest_handle_add_offset(exch.in.extent_start, exch.nr_exchanged);
340 exch.in.nr_extents -= exch.nr_exchanged;
342 if ( exch.in.extent_order <= exch.out.extent_order )
343 {
344 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
345 out_chunk_order = 0;
346 guest_handle_add_offset(
347 exch.out.extent_start, exch.nr_exchanged >> in_chunk_order);
348 exch.out.nr_extents -= exch.nr_exchanged >> in_chunk_order;
349 }
350 else
351 {
352 in_chunk_order = 0;
353 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
354 guest_handle_add_offset(
355 exch.out.extent_start, exch.nr_exchanged << out_chunk_order);
356 exch.out.nr_extents -= exch.nr_exchanged << out_chunk_order;
357 }
359 /*
360 * Only support exchange on calling domain right now. Otherwise there are
361 * tricky corner cases to consider (e.g., DOMF_dying domain).
362 */
363 if ( unlikely(exch.in.domid != DOMID_SELF) )
364 {
365 rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
366 goto fail_early;
367 }
368 d = current->domain;
370 for ( i = 0; i < (exch.in.nr_extents >> in_chunk_order); i++ )
371 {
372 if ( hypercall_preempt_check() )
373 {
374 exch.nr_exchanged += i << in_chunk_order;
375 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
376 return -EFAULT;
377 return hypercall_create_continuation(
378 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
379 }
381 /* Steal a chunk's worth of input pages from the domain. */
382 for ( j = 0; j < (1UL << in_chunk_order); j++ )
383 {
384 if ( unlikely(__copy_from_guest_offset(
385 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
386 {
387 rc = -EFAULT;
388 goto fail;
389 }
391 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
392 {
393 mfn = gmfn_to_mfn(d, gmfn + k);
394 if ( unlikely(!mfn_valid(mfn)) )
395 {
396 rc = -EINVAL;
397 goto fail;
398 }
400 page = mfn_to_page(mfn);
402 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
403 {
404 rc = -EINVAL;
405 goto fail;
406 }
408 list_add(&page->list, &in_chunk_list);
409 }
410 }
412 /* Allocate a chunk's worth of anonymous output pages. */
413 for ( j = 0; j < (1UL << out_chunk_order); j++ )
414 {
415 page = alloc_domheap_pages(
416 NULL, exch.out.extent_order, memflags);
417 if ( unlikely(page == NULL) )
418 {
419 rc = -ENOMEM;
420 goto fail;
421 }
423 list_add(&page->list, &out_chunk_list);
424 }
426 /*
427 * Success! Beyond this point we cannot fail for this chunk.
428 */
430 /* Destroy final reference to each input page. */
431 while ( !list_empty(&in_chunk_list) )
432 {
433 page = list_entry(in_chunk_list.next, struct page_info, list);
434 list_del(&page->list);
435 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
436 BUG();
437 mfn = page_to_mfn(page);
438 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
439 put_page(page);
440 }
442 /* Assign each output page to the domain. */
443 j = 0;
444 while ( !list_empty(&out_chunk_list) )
445 {
446 page = list_entry(out_chunk_list.next, struct page_info, list);
447 list_del(&page->list);
448 if ( assign_pages(d, page, exch.out.extent_order,
449 MEMF_no_refcount) )
450 BUG();
452 /* Note that we ignore errors accessing the output extent list. */
453 (void)__copy_from_guest_offset(
454 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
456 mfn = page_to_mfn(page);
457 if ( unlikely(shadow_mode_translate(d)) )
458 {
459 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
460 guest_physmap_add_page(d, gpfn + k, mfn + k);
461 }
462 else
463 {
464 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
465 set_gpfn_from_mfn(mfn + k, gpfn + k);
466 (void)__copy_to_guest_offset(
467 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
468 }
470 j++;
471 }
472 BUG_ON(j != (1UL << out_chunk_order));
473 }
475 exch.nr_exchanged += exch.in.nr_extents;
476 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
477 rc = -EFAULT;
478 return rc;
480 /*
481 * Failed a chunk! Free any partial chunk work. Tell caller how many
482 * chunks succeeded.
483 */
484 fail:
485 /* Reassign any input pages we managed to steal. */
486 while ( !list_empty(&in_chunk_list) )
487 {
488 page = list_entry(in_chunk_list.next, struct page_info, list);
489 list_del(&page->list);
490 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
491 BUG();
492 }
494 /* Free any output pages we managed to allocate. */
495 while ( !list_empty(&out_chunk_list) )
496 {
497 page = list_entry(out_chunk_list.next, struct page_info, list);
498 list_del(&page->list);
499 free_domheap_pages(page, exch.out.extent_order);
500 }
502 exch.nr_exchanged += i << in_chunk_order;
504 fail_early:
505 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
506 rc = -EFAULT;
507 return rc;
508 }
510 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
511 {
512 struct domain *d;
513 int rc, op, preempted = 0;
514 unsigned int memflags = 0;
515 unsigned long start_extent, progress;
516 struct xen_memory_reservation reservation;
517 domid_t domid;
519 op = cmd & ((1 << START_EXTENT_SHIFT) - 1);
521 switch ( op )
522 {
523 case XENMEM_increase_reservation:
524 case XENMEM_decrease_reservation:
525 case XENMEM_populate_physmap:
526 start_extent = cmd >> START_EXTENT_SHIFT;
528 if ( copy_from_guest(&reservation, arg, 1) )
529 return start_extent;
531 /* Is size too large for us to encode a continuation? */
532 if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) )
533 return start_extent;
535 if ( unlikely(start_extent > reservation.nr_extents) )
536 return start_extent;
538 if ( !guest_handle_is_null(reservation.extent_start) )
539 guest_handle_add_offset(reservation.extent_start, start_extent);
540 reservation.nr_extents -= start_extent;
542 if ( (reservation.address_bits != 0) &&
543 (reservation.address_bits <
544 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
545 {
546 if ( reservation.address_bits < 31 )
547 return start_extent;
548 memflags = MEMF_dma;
549 }
551 if ( likely(reservation.domid == DOMID_SELF) )
552 d = current->domain;
553 else if ( !IS_PRIV(current->domain) ||
554 ((d = find_domain_by_id(reservation.domid)) == NULL) )
555 return start_extent;
557 switch ( op )
558 {
559 case XENMEM_increase_reservation:
560 rc = increase_reservation(
561 d,
562 reservation.extent_start,
563 reservation.nr_extents,
564 reservation.extent_order,
565 memflags,
566 &preempted);
567 break;
568 case XENMEM_decrease_reservation:
569 rc = decrease_reservation(
570 d,
571 reservation.extent_start,
572 reservation.nr_extents,
573 reservation.extent_order,
574 &preempted);
575 break;
576 case XENMEM_populate_physmap:
577 default:
578 rc = populate_physmap(
579 d,
580 reservation.extent_start,
581 reservation.nr_extents,
582 reservation.extent_order,
583 memflags,
584 &preempted);
585 break;
586 }
588 if ( unlikely(reservation.domid != DOMID_SELF) )
589 put_domain(d);
591 rc += start_extent;
593 if ( preempted )
594 return hypercall_create_continuation(
595 __HYPERVISOR_memory_op, "lh",
596 op | (rc << START_EXTENT_SHIFT), arg);
598 break;
600 case XENMEM_exchange:
601 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
602 break;
604 case XENMEM_maximum_ram_page:
605 rc = max_page;
606 break;
608 case XENMEM_current_reservation:
609 case XENMEM_maximum_reservation:
610 if ( copy_from_guest(&domid, arg, 1) )
611 return -EFAULT;
613 if ( likely(domid == DOMID_SELF) )
614 d = current->domain;
615 else if ( !IS_PRIV(current->domain) )
616 return -EPERM;
617 else if ( (d = find_domain_by_id(domid)) == NULL )
618 return -ESRCH;
620 rc = (op == XENMEM_current_reservation) ? d->tot_pages : d->max_pages;
622 if ( unlikely(domid != DOMID_SELF) )
623 put_domain(d);
625 break;
627 case XENMEM_translate_gpfn_list:
628 progress = cmd >> START_EXTENT_SHIFT;
629 rc = translate_gpfn_list(
630 guest_handle_cast(arg, xen_translate_gpfn_list_t),
631 &progress);
632 if ( rc == -EAGAIN )
633 return hypercall_create_continuation(
634 __HYPERVISOR_memory_op, "lh",
635 op | (progress << START_EXTENT_SHIFT), arg);
636 break;
638 default:
639 rc = arch_memory_op(op, arg);
640 break;
641 }
643 return rc;
644 }
646 /*
647 * Local variables:
648 * mode: C
649 * c-set-style: "BSD"
650 * c-basic-offset: 4
651 * tab-width: 4
652 * indent-tabs-mode: nil
653 * End:
654 */