ia64/xen-unstable

view xen/common/memory.c @ 14103:ee4850bc895b

xen memory alloctor: remove bit width restrictions

Hide the (default or user specified) DMA width from anything outside
the heap allocator. I/O-capable guests can now request any width for
the memory they want exchanged/added.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kfraser@localhost.localdomain
date Fri Feb 23 17:02:58 2007 +0000 (2007-02-23)
parents 3fbe12560ffe
children 09a9b6d6c356
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/shadow.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <xen/hypercall.h>
21 #include <xen/errno.h>
22 #include <asm/current.h>
23 #include <asm/hardirq.h>
24 #include <public/memory.h>
26 struct memop_args {
27 /* INPUT */
28 struct domain *domain; /* Domain to be affected. */
29 XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */
30 unsigned int nr_extents; /* Number of extents to allocate or free. */
31 unsigned int extent_order; /* Size of each extent. */
32 unsigned int memflags; /* Allocation flags. */
34 /* INPUT/OUTPUT */
35 unsigned int nr_done; /* Number of extents processed so far. */
36 int preempted; /* Was the hypercall preempted? */
37 };
39 static unsigned int select_local_cpu(struct domain *d)
40 {
41 struct vcpu *v = d->vcpu[0];
42 return (v ? v->processor : 0);
43 }
45 static void increase_reservation(struct memop_args *a)
46 {
47 struct page_info *page;
48 unsigned long i;
49 xen_pfn_t mfn;
50 struct domain *d = a->domain;
51 unsigned int cpu = select_local_cpu(d);
53 if ( !guest_handle_is_null(a->extent_list) &&
54 !guest_handle_okay(a->extent_list, a->nr_extents) )
55 return;
57 if ( (a->extent_order != 0) &&
58 !multipage_allocation_permitted(current->domain) )
59 return;
61 for ( i = a->nr_done; i < a->nr_extents; i++ )
62 {
63 if ( hypercall_preempt_check() )
64 {
65 a->preempted = 1;
66 goto out;
67 }
69 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
70 if ( unlikely(page == NULL) )
71 {
72 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
73 "id=%d memflags=%x (%ld of %d)\n",
74 a->extent_order, d->domain_id, a->memflags,
75 i, a->nr_extents);
76 goto out;
77 }
79 /* Inform the domain of the new page's machine address. */
80 if ( !guest_handle_is_null(a->extent_list) )
81 {
82 mfn = page_to_mfn(page);
83 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
84 goto out;
85 }
86 }
88 out:
89 a->nr_done = i;
90 }
92 static void populate_physmap(struct memop_args *a)
93 {
94 struct page_info *page;
95 unsigned long i, j;
96 xen_pfn_t gpfn, mfn;
97 struct domain *d = a->domain;
98 unsigned int cpu = select_local_cpu(d);
100 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
101 return;
103 if ( (a->extent_order != 0) &&
104 !multipage_allocation_permitted(current->domain) )
105 return;
107 for ( i = a->nr_done; i < a->nr_extents; i++ )
108 {
109 if ( hypercall_preempt_check() )
110 {
111 a->preempted = 1;
112 goto out;
113 }
115 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
116 goto out;
118 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
119 if ( unlikely(page == NULL) )
120 {
121 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
122 "id=%d memflags=%x (%ld of %d)\n",
123 a->extent_order, d->domain_id, a->memflags,
124 i, a->nr_extents);
125 goto out;
126 }
128 mfn = page_to_mfn(page);
130 if ( unlikely(shadow_mode_translate(d)) )
131 {
132 for ( j = 0; j < (1 << a->extent_order); j++ )
133 guest_physmap_add_page(d, gpfn + j, mfn + j);
134 }
135 else
136 {
137 for ( j = 0; j < (1 << a->extent_order); j++ )
138 set_gpfn_from_mfn(mfn + j, gpfn + j);
140 /* Inform the domain of the new page's machine address. */
141 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
142 goto out;
143 }
144 }
146 out:
147 a->nr_done = i;
148 }
150 int guest_remove_page(struct domain *d, unsigned long gmfn)
151 {
152 struct page_info *page;
153 unsigned long mfn;
155 mfn = gmfn_to_mfn(d, gmfn);
156 if ( unlikely(!mfn_valid(mfn)) )
157 {
158 gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
159 d->domain_id, gmfn);
160 return 0;
161 }
163 page = mfn_to_page(mfn);
164 if ( unlikely(!get_page(page, d)) )
165 {
166 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
167 return 0;
168 }
170 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
171 put_page_and_type(page);
173 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
174 put_page(page);
176 if ( unlikely(!page_is_removable(page)) )
177 {
178 shadow_drop_references(d, page);
179 /* We'll make this a guest-visible error in future, so take heed! */
180 if ( !page_is_removable(page) )
181 gdprintk(XENLOG_INFO, "Dom%d freeing in-use page %lx "
182 "(pseudophys %lx): count=%lx type=%lx\n",
183 d->domain_id, mfn, get_gpfn_from_mfn(mfn),
184 (unsigned long)page->count_info, page->u.inuse.type_info);
185 }
187 guest_physmap_remove_page(d, gmfn, mfn);
189 put_page(page);
191 return 1;
192 }
194 static void decrease_reservation(struct memop_args *a)
195 {
196 unsigned long i, j;
197 xen_pfn_t gmfn;
199 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
200 return;
202 for ( i = a->nr_done; i < a->nr_extents; i++ )
203 {
204 if ( hypercall_preempt_check() )
205 {
206 a->preempted = 1;
207 goto out;
208 }
210 if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
211 goto out;
213 for ( j = 0; j < (1 << a->extent_order); j++ )
214 if ( !guest_remove_page(a->domain, gmfn + j) )
215 goto out;
216 }
218 out:
219 a->nr_done = i;
220 }
222 static long translate_gpfn_list(
223 XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
224 {
225 struct xen_translate_gpfn_list op;
226 unsigned long i;
227 xen_pfn_t gpfn;
228 xen_pfn_t mfn;
229 struct domain *d;
231 if ( copy_from_guest(&op, uop, 1) )
232 return -EFAULT;
234 /* Is size too large for us to encode a continuation? */
235 if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
236 return -EINVAL;
238 if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
239 !guest_handle_okay(op.mfn_list, op.nr_gpfns) )
240 return -EFAULT;
242 if ( op.domid == DOMID_SELF )
243 op.domid = current->domain->domain_id;
244 else if ( !IS_PRIV(current->domain) )
245 return -EPERM;
247 if ( (d = get_domain_by_id(op.domid)) == NULL )
248 return -ESRCH;
250 if ( !shadow_mode_translate(d) )
251 {
252 put_domain(d);
253 return -EINVAL;
254 }
256 for ( i = *progress; i < op.nr_gpfns; i++ )
257 {
258 if ( hypercall_preempt_check() )
259 {
260 put_domain(d);
261 *progress = i;
262 return -EAGAIN;
263 }
265 if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
266 {
267 put_domain(d);
268 return -EFAULT;
269 }
271 mfn = gmfn_to_mfn(d, gpfn);
273 if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
274 {
275 put_domain(d);
276 return -EFAULT;
277 }
278 }
280 put_domain(d);
281 return 0;
282 }
284 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
285 {
286 struct xen_memory_exchange exch;
287 LIST_HEAD(in_chunk_list);
288 LIST_HEAD(out_chunk_list);
289 unsigned long in_chunk_order, out_chunk_order;
290 xen_pfn_t gpfn, gmfn, mfn;
291 unsigned long i, j, k;
292 unsigned int memflags = 0, cpu;
293 long rc = 0;
294 struct domain *d;
295 struct page_info *page;
297 if ( copy_from_guest(&exch, arg, 1) )
298 return -EFAULT;
300 /* Various sanity checks. */
301 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
302 /* Input and output domain identifiers match? */
303 (exch.in.domid != exch.out.domid) ||
304 /* Sizes of input and output lists do not overflow a long? */
305 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
306 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
307 /* Sizes of input and output lists match? */
308 ((exch.in.nr_extents << exch.in.extent_order) !=
309 (exch.out.nr_extents << exch.out.extent_order)) )
310 {
311 rc = -EINVAL;
312 goto fail_early;
313 }
315 /* Only privileged guests can allocate multi-page contiguous extents. */
316 if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
317 !multipage_allocation_permitted(current->domain) )
318 {
319 rc = -EPERM;
320 goto fail_early;
321 }
323 if ( (exch.out.address_bits != 0) &&
324 (exch.out.address_bits <
325 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
326 {
327 if ( exch.out.address_bits <= PAGE_SHIFT )
328 {
329 rc = -ENOMEM;
330 goto fail_early;
331 }
332 memflags = MEMF_bits(exch.out.address_bits);
333 }
335 if ( exch.in.extent_order <= exch.out.extent_order )
336 {
337 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
338 out_chunk_order = 0;
339 }
340 else
341 {
342 in_chunk_order = 0;
343 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
344 }
346 /*
347 * Only support exchange on calling domain right now. Otherwise there are
348 * tricky corner cases to consider (e.g., DOMF_dying domain).
349 */
350 if ( unlikely(exch.in.domid != DOMID_SELF) )
351 {
352 rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
353 goto fail_early;
354 }
355 d = current->domain;
357 cpu = select_local_cpu(d);
359 for ( i = (exch.nr_exchanged >> in_chunk_order);
360 i < (exch.in.nr_extents >> in_chunk_order);
361 i++ )
362 {
363 if ( hypercall_preempt_check() )
364 {
365 exch.nr_exchanged = i << in_chunk_order;
366 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
367 return -EFAULT;
368 return hypercall_create_continuation(
369 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
370 }
372 /* Steal a chunk's worth of input pages from the domain. */
373 for ( j = 0; j < (1UL << in_chunk_order); j++ )
374 {
375 if ( unlikely(__copy_from_guest_offset(
376 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
377 {
378 rc = -EFAULT;
379 goto fail;
380 }
382 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
383 {
384 mfn = gmfn_to_mfn(d, gmfn + k);
385 if ( unlikely(!mfn_valid(mfn)) )
386 {
387 rc = -EINVAL;
388 goto fail;
389 }
391 page = mfn_to_page(mfn);
393 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
394 {
395 rc = -EINVAL;
396 goto fail;
397 }
399 list_add(&page->list, &in_chunk_list);
400 }
401 }
403 /* Allocate a chunk's worth of anonymous output pages. */
404 for ( j = 0; j < (1UL << out_chunk_order); j++ )
405 {
406 page = __alloc_domheap_pages(
407 NULL, cpu, exch.out.extent_order, memflags);
408 if ( unlikely(page == NULL) )
409 {
410 rc = -ENOMEM;
411 goto fail;
412 }
414 list_add(&page->list, &out_chunk_list);
415 }
417 /*
418 * Success! Beyond this point we cannot fail for this chunk.
419 */
421 /* Destroy final reference to each input page. */
422 while ( !list_empty(&in_chunk_list) )
423 {
424 page = list_entry(in_chunk_list.next, struct page_info, list);
425 list_del(&page->list);
426 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
427 BUG();
428 mfn = page_to_mfn(page);
429 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
430 put_page(page);
431 }
433 /* Assign each output page to the domain. */
434 j = 0;
435 while ( !list_empty(&out_chunk_list) )
436 {
437 page = list_entry(out_chunk_list.next, struct page_info, list);
438 list_del(&page->list);
439 if ( assign_pages(d, page, exch.out.extent_order,
440 MEMF_no_refcount) )
441 BUG();
443 /* Note that we ignore errors accessing the output extent list. */
444 (void)__copy_from_guest_offset(
445 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
447 mfn = page_to_mfn(page);
448 if ( unlikely(shadow_mode_translate(d)) )
449 {
450 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
451 guest_physmap_add_page(d, gpfn + k, mfn + k);
452 }
453 else
454 {
455 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
456 set_gpfn_from_mfn(mfn + k, gpfn + k);
457 (void)__copy_to_guest_offset(
458 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
459 }
461 j++;
462 }
463 BUG_ON(j != (1UL << out_chunk_order));
464 }
466 exch.nr_exchanged = exch.in.nr_extents;
467 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
468 rc = -EFAULT;
469 return rc;
471 /*
472 * Failed a chunk! Free any partial chunk work. Tell caller how many
473 * chunks succeeded.
474 */
475 fail:
476 /* Reassign any input pages we managed to steal. */
477 while ( !list_empty(&in_chunk_list) )
478 {
479 page = list_entry(in_chunk_list.next, struct page_info, list);
480 list_del(&page->list);
481 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
482 BUG();
483 }
485 /* Free any output pages we managed to allocate. */
486 while ( !list_empty(&out_chunk_list) )
487 {
488 page = list_entry(out_chunk_list.next, struct page_info, list);
489 list_del(&page->list);
490 free_domheap_pages(page, exch.out.extent_order);
491 }
493 exch.nr_exchanged = i << in_chunk_order;
495 fail_early:
496 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
497 rc = -EFAULT;
498 return rc;
499 }
501 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
502 {
503 struct domain *d;
504 int rc, op;
505 unsigned long start_extent, progress;
506 struct xen_memory_reservation reservation;
507 struct memop_args args;
508 domid_t domid;
510 op = cmd & MEMOP_CMD_MASK;
512 switch ( op )
513 {
514 case XENMEM_increase_reservation:
515 case XENMEM_decrease_reservation:
516 case XENMEM_populate_physmap:
517 start_extent = cmd >> MEMOP_EXTENT_SHIFT;
519 if ( copy_from_guest(&reservation, arg, 1) )
520 return start_extent;
522 /* Is size too large for us to encode a continuation? */
523 if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
524 return start_extent;
526 if ( unlikely(start_extent > reservation.nr_extents) )
527 return start_extent;
529 args.extent_list = reservation.extent_start;
530 args.nr_extents = reservation.nr_extents;
531 args.extent_order = reservation.extent_order;
532 args.nr_done = start_extent;
533 args.preempted = 0;
534 args.memflags = 0;
536 if ( (reservation.address_bits != 0) &&
537 (reservation.address_bits <
538 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
539 {
540 if ( reservation.address_bits <= PAGE_SHIFT )
541 return start_extent;
542 args.memflags = MEMF_bits(reservation.address_bits);
543 }
545 if ( likely(reservation.domid == DOMID_SELF) )
546 d = current->domain;
547 else if ( !IS_PRIV(current->domain) ||
548 ((d = get_domain_by_id(reservation.domid)) == NULL) )
549 return start_extent;
550 args.domain = d;
552 switch ( op )
553 {
554 case XENMEM_increase_reservation:
555 increase_reservation(&args);
556 break;
557 case XENMEM_decrease_reservation:
558 decrease_reservation(&args);
559 break;
560 default: /* XENMEM_populate_physmap */
561 populate_physmap(&args);
562 break;
563 }
565 if ( unlikely(reservation.domid != DOMID_SELF) )
566 put_domain(d);
568 rc = args.nr_done;
570 if ( args.preempted )
571 return hypercall_create_continuation(
572 __HYPERVISOR_memory_op, "lh",
573 op | (rc << MEMOP_EXTENT_SHIFT), arg);
575 break;
577 case XENMEM_exchange:
578 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
579 break;
581 case XENMEM_maximum_ram_page:
582 rc = max_page;
583 break;
585 case XENMEM_current_reservation:
586 case XENMEM_maximum_reservation:
587 if ( copy_from_guest(&domid, arg, 1) )
588 return -EFAULT;
590 if ( likely(domid == DOMID_SELF) )
591 d = current->domain;
592 else if ( !IS_PRIV(current->domain) )
593 return -EPERM;
594 else if ( (d = get_domain_by_id(domid)) == NULL )
595 return -ESRCH;
597 rc = (op == XENMEM_current_reservation) ? d->tot_pages : d->max_pages;
599 if ( unlikely(domid != DOMID_SELF) )
600 put_domain(d);
602 break;
604 case XENMEM_translate_gpfn_list:
605 progress = cmd >> MEMOP_EXTENT_SHIFT;
606 rc = translate_gpfn_list(
607 guest_handle_cast(arg, xen_translate_gpfn_list_t),
608 &progress);
609 if ( rc == -EAGAIN )
610 return hypercall_create_continuation(
611 __HYPERVISOR_memory_op, "lh",
612 op | (progress << MEMOP_EXTENT_SHIFT), arg);
613 break;
615 default:
616 rc = arch_memory_op(op, arg);
617 break;
618 }
620 return rc;
621 }
623 /*
624 * Local variables:
625 * mode: C
626 * c-set-style: "BSD"
627 * c-basic-offset: 4
628 * tab-width: 4
629 * indent-tabs-mode: nil
630 * End:
631 */