ia64/xen-unstable

view xen/common/memory.c @ 12390:e28beea6d228

[IA64] Fix time services of EFI emulation

This patch serializes the execution of following efi.runtimes.
- GetTime
- SetTime
- GetWakeTime
- SetWakeTime

Linux/ia64 uses similar spinlocks in the EFI RTC driver.

Signed-off-by: Masaki Kanno <kanno.masaki@jp.fujitsu.com>
author awilliam@xenbuild.aw
date Fri Nov 10 12:03:19 2006 -0700 (2006-11-10)
parents 5cdd4da17036
children a4ba47e9bc1f
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
10 #include <xen/config.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/event.h>
17 #include <xen/shadow.h>
18 #include <xen/iocap.h>
19 #include <xen/guest_access.h>
20 #include <xen/errno.h>
21 #include <asm/current.h>
22 #include <asm/hardirq.h>
23 #include <public/memory.h>
25 /*
26 * To allow safe resume of do_memory_op() after preemption, we need to know
27 * at what point in the page list to resume. For this purpose I steal the
28 * high-order bits of the @cmd parameter, which are otherwise unused and zero.
29 */
30 #define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
32 struct memop_args {
33 /* INPUT */
34 struct domain *domain; /* Domain to be affected. */
35 XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */
36 unsigned int nr_extents; /* Number of extents to allocate or free. */
37 unsigned int extent_order; /* Size of each extent. */
38 unsigned int memflags; /* Allocation flags. */
40 /* INPUT/OUTPUT */
41 unsigned int nr_done; /* Number of extents processed so far. */
42 int preempted; /* Was the hypercall preempted? */
43 };
45 static unsigned int select_local_cpu(struct domain *d)
46 {
47 struct vcpu *v = d->vcpu[0];
48 return (v ? v->processor : 0);
49 }
51 static void increase_reservation(struct memop_args *a)
52 {
53 struct page_info *page;
54 unsigned long i;
55 xen_pfn_t mfn;
56 struct domain *d = a->domain;
57 unsigned int cpu = select_local_cpu(d);
59 if ( !guest_handle_is_null(a->extent_list) &&
60 !guest_handle_okay(a->extent_list, a->nr_extents) )
61 return;
63 if ( (a->extent_order != 0) &&
64 !multipage_allocation_permitted(current->domain) )
65 return;
67 for ( i = a->nr_done; i < a->nr_extents; i++ )
68 {
69 if ( hypercall_preempt_check() )
70 {
71 a->preempted = 1;
72 goto out;
73 }
75 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
76 if ( unlikely(page == NULL) )
77 {
78 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
79 "id=%d memflags=%x (%ld of %d)\n",
80 a->extent_order, d->domain_id, a->memflags,
81 i, a->nr_extents);
82 goto out;
83 }
85 /* Inform the domain of the new page's machine address. */
86 if ( !guest_handle_is_null(a->extent_list) )
87 {
88 mfn = page_to_mfn(page);
89 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
90 goto out;
91 }
92 }
94 out:
95 a->nr_done = i;
96 }
98 static void populate_physmap(struct memop_args *a)
99 {
100 struct page_info *page;
101 unsigned long i, j;
102 xen_pfn_t gpfn, mfn;
103 struct domain *d = a->domain;
104 unsigned int cpu = select_local_cpu(d);
106 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
107 return;
109 if ( (a->extent_order != 0) &&
110 !multipage_allocation_permitted(current->domain) )
111 return;
113 for ( i = a->nr_done; i < a->nr_extents; i++ )
114 {
115 if ( hypercall_preempt_check() )
116 {
117 a->preempted = 1;
118 goto out;
119 }
121 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
122 goto out;
124 page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
125 if ( unlikely(page == NULL) )
126 {
127 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
128 "id=%d memflags=%x (%ld of %d)\n",
129 a->extent_order, d->domain_id, a->memflags,
130 i, a->nr_extents);
131 goto out;
132 }
134 mfn = page_to_mfn(page);
136 if ( unlikely(shadow_mode_translate(d)) )
137 {
138 for ( j = 0; j < (1 << a->extent_order); j++ )
139 guest_physmap_add_page(d, gpfn + j, mfn + j);
140 }
141 else
142 {
143 for ( j = 0; j < (1 << a->extent_order); j++ )
144 set_gpfn_from_mfn(mfn + j, gpfn + j);
146 /* Inform the domain of the new page's machine address. */
147 if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
148 goto out;
149 }
150 }
152 out:
153 a->nr_done = i;
154 }
156 int guest_remove_page(struct domain *d, unsigned long gmfn)
157 {
158 struct page_info *page;
159 unsigned long mfn;
161 mfn = gmfn_to_mfn(d, gmfn);
162 if ( unlikely(!mfn_valid(mfn)) )
163 {
164 gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
165 d->domain_id, gmfn);
166 return 0;
167 }
169 page = mfn_to_page(mfn);
170 if ( unlikely(!get_page(page, d)) )
171 {
172 gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
173 return 0;
174 }
176 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
177 put_page_and_type(page);
179 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
180 put_page(page);
182 if ( unlikely(!page_is_removable(page)) )
183 {
184 /* We'll make this a guest-visible error in future, so take heed! */
185 gdprintk(XENLOG_INFO, "Dom%d freeing in-use page %lx (pseudophys %lx):"
186 " count=%lx type=%lx\n",
187 d->domain_id, mfn, get_gpfn_from_mfn(mfn),
188 (unsigned long)page->count_info, page->u.inuse.type_info);
189 }
191 guest_physmap_remove_page(d, gmfn, mfn);
193 put_page(page);
195 return 1;
196 }
198 static void decrease_reservation(struct memop_args *a)
199 {
200 unsigned long i, j;
201 xen_pfn_t gmfn;
203 if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
204 return;
206 for ( i = a->nr_done; i < a->nr_extents; i++ )
207 {
208 if ( hypercall_preempt_check() )
209 {
210 a->preempted = 1;
211 goto out;
212 }
214 if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
215 goto out;
217 for ( j = 0; j < (1 << a->extent_order); j++ )
218 if ( !guest_remove_page(a->domain, gmfn + j) )
219 goto out;
220 }
222 out:
223 a->nr_done = i;
224 }
226 static long translate_gpfn_list(
227 XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
228 {
229 struct xen_translate_gpfn_list op;
230 unsigned long i;
231 xen_pfn_t gpfn;
232 xen_pfn_t mfn;
233 struct domain *d;
235 if ( copy_from_guest(&op, uop, 1) )
236 return -EFAULT;
238 /* Is size too large for us to encode a continuation? */
239 if ( op.nr_gpfns > (ULONG_MAX >> START_EXTENT_SHIFT) )
240 return -EINVAL;
242 if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
243 !guest_handle_okay(op.mfn_list, op.nr_gpfns) )
244 return -EFAULT;
246 if ( op.domid == DOMID_SELF )
247 op.domid = current->domain->domain_id;
248 else if ( !IS_PRIV(current->domain) )
249 return -EPERM;
251 if ( (d = find_domain_by_id(op.domid)) == NULL )
252 return -ESRCH;
254 if ( !shadow_mode_translate(d) )
255 {
256 put_domain(d);
257 return -EINVAL;
258 }
260 for ( i = *progress; i < op.nr_gpfns; i++ )
261 {
262 if ( hypercall_preempt_check() )
263 {
264 put_domain(d);
265 *progress = i;
266 return -EAGAIN;
267 }
269 if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
270 {
271 put_domain(d);
272 return -EFAULT;
273 }
275 mfn = gmfn_to_mfn(d, gpfn);
277 if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
278 {
279 put_domain(d);
280 return -EFAULT;
281 }
282 }
284 put_domain(d);
285 return 0;
286 }
288 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
289 {
290 struct xen_memory_exchange exch;
291 LIST_HEAD(in_chunk_list);
292 LIST_HEAD(out_chunk_list);
293 unsigned long in_chunk_order, out_chunk_order;
294 xen_pfn_t gpfn, gmfn, mfn;
295 unsigned long i, j, k;
296 unsigned int memflags = 0, cpu;
297 long rc = 0;
298 struct domain *d;
299 struct page_info *page;
301 if ( copy_from_guest(&exch, arg, 1) )
302 return -EFAULT;
304 /* Various sanity checks. */
305 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
306 /* Input and output domain identifiers match? */
307 (exch.in.domid != exch.out.domid) ||
308 /* Sizes of input and output lists do not overflow a long? */
309 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
310 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
311 /* Sizes of input and output lists match? */
312 ((exch.in.nr_extents << exch.in.extent_order) !=
313 (exch.out.nr_extents << exch.out.extent_order)) )
314 {
315 rc = -EINVAL;
316 goto fail_early;
317 }
319 /* Only privileged guests can allocate multi-page contiguous extents. */
320 if ( ((exch.in.extent_order != 0) || (exch.out.extent_order != 0)) &&
321 !multipage_allocation_permitted(current->domain) )
322 {
323 rc = -EPERM;
324 goto fail_early;
325 }
327 if ( (exch.out.address_bits != 0) &&
328 (exch.out.address_bits <
329 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
330 {
331 if ( exch.out.address_bits < 31 )
332 {
333 rc = -ENOMEM;
334 goto fail_early;
335 }
336 memflags = MEMF_dma;
337 }
339 if ( exch.in.extent_order <= exch.out.extent_order )
340 {
341 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
342 out_chunk_order = 0;
343 }
344 else
345 {
346 in_chunk_order = 0;
347 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
348 }
350 /*
351 * Only support exchange on calling domain right now. Otherwise there are
352 * tricky corner cases to consider (e.g., DOMF_dying domain).
353 */
354 if ( unlikely(exch.in.domid != DOMID_SELF) )
355 {
356 rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
357 goto fail_early;
358 }
359 d = current->domain;
361 cpu = select_local_cpu(d);
363 for ( i = (exch.nr_exchanged >> in_chunk_order);
364 i < (exch.in.nr_extents >> in_chunk_order);
365 i++ )
366 {
367 if ( hypercall_preempt_check() )
368 {
369 exch.nr_exchanged = i << in_chunk_order;
370 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
371 return -EFAULT;
372 return hypercall_create_continuation(
373 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
374 }
376 /* Steal a chunk's worth of input pages from the domain. */
377 for ( j = 0; j < (1UL << in_chunk_order); j++ )
378 {
379 if ( unlikely(__copy_from_guest_offset(
380 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
381 {
382 rc = -EFAULT;
383 goto fail;
384 }
386 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
387 {
388 mfn = gmfn_to_mfn(d, gmfn + k);
389 if ( unlikely(!mfn_valid(mfn)) )
390 {
391 rc = -EINVAL;
392 goto fail;
393 }
395 page = mfn_to_page(mfn);
397 if ( unlikely(steal_page(d, page, MEMF_no_refcount)) )
398 {
399 rc = -EINVAL;
400 goto fail;
401 }
403 list_add(&page->list, &in_chunk_list);
404 }
405 }
407 /* Allocate a chunk's worth of anonymous output pages. */
408 for ( j = 0; j < (1UL << out_chunk_order); j++ )
409 {
410 page = __alloc_domheap_pages(
411 NULL, cpu, exch.out.extent_order, memflags);
412 if ( unlikely(page == NULL) )
413 {
414 rc = -ENOMEM;
415 goto fail;
416 }
418 list_add(&page->list, &out_chunk_list);
419 }
421 /*
422 * Success! Beyond this point we cannot fail for this chunk.
423 */
425 /* Destroy final reference to each input page. */
426 while ( !list_empty(&in_chunk_list) )
427 {
428 page = list_entry(in_chunk_list.next, struct page_info, list);
429 list_del(&page->list);
430 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
431 BUG();
432 mfn = page_to_mfn(page);
433 guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
434 put_page(page);
435 }
437 /* Assign each output page to the domain. */
438 j = 0;
439 while ( !list_empty(&out_chunk_list) )
440 {
441 page = list_entry(out_chunk_list.next, struct page_info, list);
442 list_del(&page->list);
443 if ( assign_pages(d, page, exch.out.extent_order,
444 MEMF_no_refcount) )
445 BUG();
447 /* Note that we ignore errors accessing the output extent list. */
448 (void)__copy_from_guest_offset(
449 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
451 mfn = page_to_mfn(page);
452 if ( unlikely(shadow_mode_translate(d)) )
453 {
454 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
455 guest_physmap_add_page(d, gpfn + k, mfn + k);
456 }
457 else
458 {
459 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
460 set_gpfn_from_mfn(mfn + k, gpfn + k);
461 (void)__copy_to_guest_offset(
462 exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
463 }
465 j++;
466 }
467 BUG_ON(j != (1UL << out_chunk_order));
468 }
470 exch.nr_exchanged = exch.in.nr_extents;
471 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
472 rc = -EFAULT;
473 return rc;
475 /*
476 * Failed a chunk! Free any partial chunk work. Tell caller how many
477 * chunks succeeded.
478 */
479 fail:
480 /* Reassign any input pages we managed to steal. */
481 while ( !list_empty(&in_chunk_list) )
482 {
483 page = list_entry(in_chunk_list.next, struct page_info, list);
484 list_del(&page->list);
485 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
486 BUG();
487 }
489 /* Free any output pages we managed to allocate. */
490 while ( !list_empty(&out_chunk_list) )
491 {
492 page = list_entry(out_chunk_list.next, struct page_info, list);
493 list_del(&page->list);
494 free_domheap_pages(page, exch.out.extent_order);
495 }
497 exch.nr_exchanged = i << in_chunk_order;
499 fail_early:
500 if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
501 rc = -EFAULT;
502 return rc;
503 }
505 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE(void) arg)
506 {
507 struct domain *d;
508 int rc, op;
509 unsigned long start_extent, progress;
510 struct xen_memory_reservation reservation;
511 struct memop_args args;
512 domid_t domid;
514 op = cmd & ((1 << START_EXTENT_SHIFT) - 1);
516 switch ( op )
517 {
518 case XENMEM_increase_reservation:
519 case XENMEM_decrease_reservation:
520 case XENMEM_populate_physmap:
521 start_extent = cmd >> START_EXTENT_SHIFT;
523 if ( copy_from_guest(&reservation, arg, 1) )
524 return start_extent;
526 /* Is size too large for us to encode a continuation? */
527 if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) )
528 return start_extent;
530 if ( unlikely(start_extent > reservation.nr_extents) )
531 return start_extent;
533 args.extent_list = reservation.extent_start;
534 args.nr_extents = reservation.nr_extents;
535 args.extent_order = reservation.extent_order;
536 args.nr_done = start_extent;
537 args.preempted = 0;
538 args.memflags = 0;
540 if ( (reservation.address_bits != 0) &&
541 (reservation.address_bits <
542 (get_order_from_pages(max_page) + PAGE_SHIFT)) )
543 {
544 if ( reservation.address_bits < 31 )
545 return start_extent;
546 args.memflags = MEMF_dma;
547 }
549 if ( likely(reservation.domid == DOMID_SELF) )
550 d = current->domain;
551 else if ( !IS_PRIV(current->domain) ||
552 ((d = find_domain_by_id(reservation.domid)) == NULL) )
553 return start_extent;
554 args.domain = d;
556 switch ( op )
557 {
558 case XENMEM_increase_reservation:
559 increase_reservation(&args);
560 break;
561 case XENMEM_decrease_reservation:
562 decrease_reservation(&args);
563 break;
564 default: /* XENMEM_populate_physmap */
565 populate_physmap(&args);
566 break;
567 }
569 if ( unlikely(reservation.domid != DOMID_SELF) )
570 put_domain(d);
572 rc = args.nr_done;
574 if ( args.preempted )
575 return hypercall_create_continuation(
576 __HYPERVISOR_memory_op, "lh",
577 op | (rc << START_EXTENT_SHIFT), arg);
579 break;
581 case XENMEM_exchange:
582 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
583 break;
585 case XENMEM_maximum_ram_page:
586 rc = max_page;
587 break;
589 case XENMEM_current_reservation:
590 case XENMEM_maximum_reservation:
591 if ( copy_from_guest(&domid, arg, 1) )
592 return -EFAULT;
594 if ( likely(domid == DOMID_SELF) )
595 d = current->domain;
596 else if ( !IS_PRIV(current->domain) )
597 return -EPERM;
598 else if ( (d = find_domain_by_id(domid)) == NULL )
599 return -ESRCH;
601 rc = (op == XENMEM_current_reservation) ? d->tot_pages : d->max_pages;
603 if ( unlikely(domid != DOMID_SELF) )
604 put_domain(d);
606 break;
608 case XENMEM_translate_gpfn_list:
609 progress = cmd >> START_EXTENT_SHIFT;
610 rc = translate_gpfn_list(
611 guest_handle_cast(arg, xen_translate_gpfn_list_t),
612 &progress);
613 if ( rc == -EAGAIN )
614 return hypercall_create_continuation(
615 __HYPERVISOR_memory_op, "lh",
616 op | (progress << START_EXTENT_SHIFT), arg);
617 break;
619 default:
620 rc = arch_memory_op(op, arg);
621 break;
622 }
624 return rc;
625 }
627 /*
628 * Local variables:
629 * mode: C
630 * c-set-style: "BSD"
631 * c-basic-offset: 4
632 * tab-width: 4
633 * indent-tabs-mode: nil
634 * End:
635 */