ia64/linux-2.6.18-xen.hg

view arch/ia64/xen/hypervisor.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents e410857fd83c
children
line source
1 /******************************************************************************
2 * include/asm-ia64/shadow.h
3 *
4 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
5 * VA Linux Systems Japan K.K.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
23 #include <linux/spinlock.h>
24 #include <linux/bootmem.h>
25 #include <linux/module.h>
26 #include <linux/vmalloc.h>
27 #include <linux/efi.h>
28 #include <asm/page.h>
29 #include <asm/pgalloc.h>
30 #include <asm/meminit.h>
31 #include <asm/hypervisor.h>
32 #include <asm/hypercall.h>
33 #include <xen/interface/memory.h>
34 #include <xen/xencons.h>
35 #include <xen/balloon.h>
37 shared_info_t *HYPERVISOR_shared_info __read_mostly =
38 (shared_info_t *)XSI_BASE;
39 EXPORT_SYMBOL(HYPERVISOR_shared_info);
41 start_info_t *xen_start_info;
42 EXPORT_SYMBOL(xen_start_info);
44 EXPORT_SYMBOL(running_on_xen);
46 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
47 static int p2m_expose_init(void);
48 #else
49 #define p2m_expose_init() (-ENOSYS)
50 #define p2m_expose_resume() ((void)0)
51 #endif
53 EXPORT_SYMBOL(__hypercall);
55 void __init
56 xen_setup(char **cmdline_p)
57 {
58 struct dom0_vga_console_info *info;
59 extern void dig_setup(char **cmdline_p);
61 if (ia64_platform_is("xen"))
62 dig_setup(cmdline_p);
64 if (!is_running_on_xen() || !is_initial_xendomain())
65 return;
67 info = (void *)((char *)xen_start_info +
68 xen_start_info->console.dom0.info_off);
69 dom0_init_screen_info(info, xen_start_info->console.dom0.info_size);
71 xen_start_info->console.domU.mfn = 0;
72 xen_start_info->console.domU.evtchn = 0;
73 }
75 void __cpuinit
76 xen_cpu_init(void)
77 {
78 extern void xen_smp_intr_init(void);
79 xen_smp_intr_init();
80 }
82 /*
83 * __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
84 * are based on i386 xen_create_contiguous_region(),
85 * xen_destroy_contiguous_region()
86 */
88 /* Protected by balloon_lock. */
89 #define MAX_CONTIG_ORDER 7
90 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
92 /* Ensure multi-page extents are contiguous in machine memory. */
93 int
94 __xen_create_contiguous_region(unsigned long vstart,
95 unsigned int order, unsigned int address_bits)
96 {
97 unsigned long error = 0;
98 unsigned long gphys = __pa(vstart);
99 unsigned long start_gpfn = gphys >> PAGE_SHIFT;
100 unsigned long num_gpfn = 1 << order;
101 unsigned long i;
102 unsigned long flags;
104 unsigned long *in_frames = discontig_frames, out_frame;
105 int success;
106 struct xen_memory_exchange exchange = {
107 .in = {
108 .nr_extents = num_gpfn,
109 .extent_order = 0,
110 .domid = DOMID_SELF
111 },
112 .out = {
113 .nr_extents = 1,
114 .extent_order = order,
115 .address_bits = address_bits,
116 .domid = DOMID_SELF
117 },
118 .nr_exchanged = 0
119 };
121 if (unlikely(order > MAX_CONTIG_ORDER))
122 return -ENOMEM;
124 set_xen_guest_handle(exchange.in.extent_start, in_frames);
125 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
127 scrub_pages(vstart, num_gpfn);
129 balloon_lock(flags);
131 /* Get a new contiguous memory extent. */
132 for (i = 0; i < num_gpfn; i++)
133 in_frames[i] = start_gpfn + i;
134 out_frame = start_gpfn;
135 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
136 success = (exchange.nr_exchanged == num_gpfn);
137 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
138 BUG_ON(success && (error != 0));
139 if (unlikely(error == -ENOSYS)) {
140 /* Compatibility when XENMEM_exchange is unsupported. */
141 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
142 &exchange.in);
143 BUG_ON(error != num_gpfn);
144 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
145 &exchange.out);
146 if (error != 1) {
147 /* Couldn't get special memory: fall back to normal. */
148 for (i = 0; i < num_gpfn; i++) {
149 in_frames[i] = start_gpfn + i;
150 }
151 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
152 &exchange.in);
153 BUG_ON(error != num_gpfn);
154 success = 0;
155 } else
156 success = 1;
157 }
158 #if 0
159 if (success) {
160 unsigned long mfn;
161 unsigned long mfn_prev = ~0UL;
162 for (i = 0; i < num_gpfn; i++) {
163 mfn = pfn_to_mfn_for_dma(start_gpfn + i);
164 if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
165 xprintk("\n");
166 xprintk("%s:%d order %d "
167 "start 0x%lx bus 0x%lx "
168 "machine 0x%lx\n",
169 __func__, __LINE__, order,
170 vstart, virt_to_bus((void*)vstart),
171 phys_to_machine_for_dma(gphys));
172 xprintk("mfn: ");
173 for (i = 0; i < num_gpfn; i++) {
174 mfn = pfn_to_mfn_for_dma(
175 start_gpfn + i);
176 xprintk("0x%lx ", mfn);
177 }
178 xprintk("\n");
179 break;
180 }
181 mfn_prev = mfn;
182 }
183 }
184 #endif
185 balloon_unlock(flags);
186 return success? 0: -ENOMEM;
187 }
189 void
190 __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
191 {
192 unsigned long flags;
193 unsigned long error = 0;
194 unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
195 unsigned long num_gpfn = 1UL << order;
196 unsigned long i;
198 unsigned long *out_frames = discontig_frames, in_frame;
199 int success;
200 struct xen_memory_exchange exchange = {
201 .in = {
202 .nr_extents = 1,
203 .extent_order = order,
204 .domid = DOMID_SELF
205 },
206 .out = {
207 .nr_extents = num_gpfn,
208 .extent_order = 0,
209 .address_bits = 0,
210 .domid = DOMID_SELF
211 },
212 .nr_exchanged = 0
213 };
216 if (unlikely(order > MAX_CONTIG_ORDER))
217 return;
219 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
220 set_xen_guest_handle(exchange.out.extent_start, out_frames);
222 scrub_pages(vstart, num_gpfn);
224 balloon_lock(flags);
226 /* Do the exchange for non-contiguous MFNs. */
227 in_frame = start_gpfn;
228 for (i = 0; i < num_gpfn; i++)
229 out_frames[i] = start_gpfn + i;
230 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
231 success = (exchange.nr_exchanged == 1);
232 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
233 BUG_ON(success && (error != 0));
234 if (unlikely(error == -ENOSYS)) {
235 /* Compatibility when XENMEM_exchange is unsupported. */
236 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
237 &exchange.in);
238 BUG_ON(error != 1);
240 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
241 &exchange.out);
242 BUG_ON(error != num_gpfn);
243 }
244 balloon_unlock(flags);
245 }
247 int
248 xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
249 unsigned int address_bits)
250 {
251 return xen_create_contiguous_region((unsigned long)page_address(pages),
252 order, address_bits);
253 }
255 /****************************************************************************
256 * grant table hack
257 * cmd: GNTTABOP_xxx
258 */
259 #include <linux/mm.h>
260 #include <xen/interface/xen.h>
261 #include <xen/gnttab.h>
263 void *arch_gnttab_alloc_shared(unsigned long *frames)
264 {
265 return __va(frames[0] << PAGE_SHIFT);
266 }
268 static void
269 gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
270 {
271 uint32_t flags;
273 flags = uop->flags;
275 if (flags & GNTMAP_host_map) {
276 if (flags & GNTMAP_application_map) {
277 xprintd("GNTMAP_application_map is not supported yet:"
278 " flags 0x%x\n", flags);
279 BUG();
280 }
281 if (flags & GNTMAP_contains_pte) {
282 xprintd("GNTMAP_contains_pte is not supported yet"
283 " flags 0x%x\n", flags);
284 BUG();
285 }
286 } else if (flags & GNTMAP_device_map) {
287 xprintd("GNTMAP_device_map is not supported yet 0x%x\n",
288 flags);
289 BUG(); /* XXX not yet. actually this flag is not used. */
290 } else {
291 BUG();
292 }
293 }
295 int
296 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
297 {
298 if (cmd == GNTTABOP_map_grant_ref) {
299 unsigned int i;
300 for (i = 0; i < count; i++) {
301 gnttab_map_grant_ref_pre(
302 (struct gnttab_map_grant_ref*)uop + i);
303 }
304 }
305 return xencomm_hypercall_grant_table_op(cmd, uop, count);
306 }
307 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
309 /**************************************************************************
310 * foreign mapping
311 */
312 #include <linux/efi.h>
313 #include <asm/meminit.h> /* for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}() */
315 static unsigned long privcmd_resource_min = 0;
316 /* Xen/ia64 currently can handle pseudo physical address bits up to
317 * (PAGE_SHIFT * 3) */
318 static unsigned long privcmd_resource_max =
319 GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
320 static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
322 static unsigned long
323 md_end_addr(const efi_memory_desc_t *md)
324 {
325 return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
326 }
328 #define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE (1024 * 1024 * 1024UL)
329 static int
330 xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
331 {
332 return (start < end &&
333 (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
334 }
336 static int __init
337 xen_ia64_privcmd_init(void)
338 {
339 void *efi_map_start, *efi_map_end, *p;
340 u64 efi_desc_size;
341 efi_memory_desc_t *md;
342 unsigned long tmp_min;
343 unsigned long tmp_max;
344 unsigned long gap_size;
345 unsigned long prev_end;
347 if (!is_running_on_xen())
348 return -1;
350 efi_map_start = __va(ia64_boot_param->efi_memmap);
351 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
352 efi_desc_size = ia64_boot_param->efi_memdesc_size;
354 /* at first check the used highest address */
355 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
356 /* nothing */;
357 }
358 md = p - efi_desc_size;
359 privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
360 if (xen_ia64_privcmd_check_size(privcmd_resource_min,
361 privcmd_resource_max))
362 goto out;
364 /* the used highest address is too large.
365 * try to find the largest gap. */
366 tmp_min = privcmd_resource_max;
367 tmp_max = 0;
368 gap_size = 0;
369 prev_end = 0;
370 for (p = efi_map_start;
371 p < efi_map_end - efi_desc_size;
372 p += efi_desc_size) {
373 unsigned long end;
374 efi_memory_desc_t* next;
375 unsigned long next_start;
377 md = p;
378 end = md_end_addr(md);
379 if (end > privcmd_resource_max)
380 break;
381 if (end < prev_end) {
382 /* work around.
383 * Xen may pass incompletely sorted memory
384 * descriptors like
385 * [x, x + length]
386 * [x, x]
387 * this order should be reversed. */
388 continue;
389 }
390 next = p + efi_desc_size;
391 next_start = next->phys_addr;
392 if (next_start > privcmd_resource_max)
393 next_start = privcmd_resource_max;
394 if (end < next_start && gap_size < (next_start - end)) {
395 tmp_min = end;
396 tmp_max = next_start;
397 gap_size = tmp_max - tmp_min;
398 }
399 prev_end = end;
400 }
402 privcmd_resource_min = GRANULEROUNDUP(tmp_min);
403 if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
404 privcmd_resource_max = tmp_max;
405 goto out;
406 }
408 privcmd_resource_min = tmp_min;
409 privcmd_resource_max = tmp_max;
410 if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
411 privcmd_resource_max)) {
412 /* Any large enough gap isn't found.
413 * go ahead anyway with the warning hoping that large region
414 * won't be requested. */
415 printk(KERN_WARNING "xen privcmd: "
416 "large enough region for privcmd mmap is not found.\n");
417 }
419 out:
420 printk(KERN_INFO "xen privcmd uses pseudo physical addr range "
421 "[0x%lx, 0x%lx] (%ldMB)\n",
422 privcmd_resource_min, privcmd_resource_max,
423 (privcmd_resource_max - privcmd_resource_min) >> 20);
424 BUG_ON(privcmd_resource_min >= privcmd_resource_max);
426 /* XXX this should be somewhere appropriate */
427 (void)p2m_expose_init();
429 return 0;
430 }
431 late_initcall(xen_ia64_privcmd_init);
433 struct xen_ia64_privcmd_entry {
434 atomic_t map_count;
435 #define INVALID_GPFN (~0UL)
436 unsigned long gpfn;
437 };
439 struct xen_ia64_privcmd_range {
440 atomic_t ref_count;
441 unsigned long pgoff; /* in PAGE_SIZE */
442 struct resource *res;
444 /* for foreign domain p2m mapping */
445 void *private;
446 void (*callback)(struct xen_ia64_privcmd_range *range, void *arg);
448 unsigned long num_entries;
449 struct xen_ia64_privcmd_entry entries[0];
450 };
452 struct xen_ia64_privcmd_vma {
453 int is_privcmd_mmapped;
454 struct xen_ia64_privcmd_range *range;
456 unsigned long num_entries;
457 struct xen_ia64_privcmd_entry *entries;
458 };
460 static void
461 xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry *entry)
462 {
463 atomic_set(&entry->map_count, 0);
464 entry->gpfn = INVALID_GPFN;
465 }
467 static int
468 xen_ia64_privcmd_entry_mmap(struct vm_area_struct *vma,
469 unsigned long addr,
470 struct xen_ia64_privcmd_range *privcmd_range,
471 int i,
472 unsigned long gmfn,
473 pgprot_t prot,
474 domid_t domid)
475 {
476 int error = 0;
477 struct xen_ia64_privcmd_entry *entry = &privcmd_range->entries[i];
478 unsigned long gpfn;
479 unsigned long flags;
481 if ((addr & ~PAGE_MASK) != 0 || gmfn == INVALID_MFN) {
482 error = -EINVAL;
483 goto out;
484 }
486 if (entry->gpfn != INVALID_GPFN) {
487 error = -EBUSY;
488 goto out;
489 }
490 gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
492 flags = ASSIGN_writable;
493 if (pgprot_val(prot) == PROT_READ)
494 flags = ASSIGN_readonly;
495 error = HYPERVISOR_add_physmap_with_gmfn(gpfn, gmfn, flags, domid);
496 if (error != 0)
497 goto out;
499 prot = vma->vm_page_prot;
500 error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
501 /*
502 * VM_PFNMAP is set in remap_pfn_range().
503 * Reset the flag to avoid BUG_ON() in do_no_page().
504 */
505 vma->vm_flags &= ~VM_PFNMAP;
507 if (error != 0) {
508 error = HYPERVISOR_zap_physmap(gpfn, 0);
509 if (error)
510 BUG(); /* XXX */
511 } else {
512 atomic_inc(&entry->map_count);
513 entry->gpfn = gpfn;
514 }
516 out:
517 return error;
518 }
520 static void
521 xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range *privcmd_range,
522 int i)
523 {
524 struct xen_ia64_privcmd_entry *entry = &privcmd_range->entries[i];
525 unsigned long gpfn = entry->gpfn;
526 /* gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
527 (vma->vm_pgoff - privcmd_range->pgoff); */
528 int error;
530 error = HYPERVISOR_zap_physmap(gpfn, 0);
531 if (error)
532 BUG(); /* XXX */
533 entry->gpfn = INVALID_GPFN;
534 }
536 static void
537 xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range *privcmd_range,
538 int i)
539 {
540 struct xen_ia64_privcmd_entry *entry = &privcmd_range->entries[i];
541 if (entry->gpfn != INVALID_GPFN)
542 atomic_inc(&entry->map_count);
543 else
544 BUG_ON(atomic_read(&entry->map_count) != 0);
545 }
547 static void
548 xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range *privcmd_range,
549 int i)
550 {
551 struct xen_ia64_privcmd_entry *entry = &privcmd_range->entries[i];
552 if (entry->gpfn != INVALID_GPFN &&
553 atomic_dec_and_test(&entry->map_count))
554 xen_ia64_privcmd_entry_munmap(privcmd_range, i);
555 }
557 static void xen_ia64_privcmd_vma_open(struct vm_area_struct *vma);
558 static void xen_ia64_privcmd_vma_close(struct vm_area_struct *vma);
560 static struct page *
561 xen_ia64_privcmd_vma_nopage(struct vm_area_struct *vma,
562 unsigned long address,
563 int *type)
564 {
565 return NOPAGE_SIGBUS;
566 }
568 struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
569 .open = xen_ia64_privcmd_vma_open,
570 .close = xen_ia64_privcmd_vma_close,
571 .nopage = xen_ia64_privcmd_vma_nopage
572 };
574 static void
575 __xen_ia64_privcmd_vma_open(struct vm_area_struct *vma,
576 struct xen_ia64_privcmd_vma *privcmd_vma,
577 struct xen_ia64_privcmd_range *privcmd_range)
578 {
579 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
580 unsigned long num_entries =
581 (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
582 unsigned long i;
584 BUG_ON(entry_offset < 0);
585 BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
587 privcmd_vma->range = privcmd_range;
588 privcmd_vma->num_entries = num_entries;
589 privcmd_vma->entries = &privcmd_range->entries[entry_offset];
590 vma->vm_private_data = privcmd_vma;
591 for (i = 0; i < privcmd_vma->num_entries; i++)
592 xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
594 vma->vm_private_data = privcmd_vma;
595 vma->vm_ops = &xen_ia64_privcmd_vm_ops;
596 }
598 static void
599 xen_ia64_privcmd_vma_open(struct vm_area_struct *vma)
600 {
601 struct xen_ia64_privcmd_vma *old_privcmd_vma =
602 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
603 struct xen_ia64_privcmd_vma *privcmd_vma =
604 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
605 struct xen_ia64_privcmd_range *privcmd_range = privcmd_vma->range;
607 atomic_inc(&privcmd_range->ref_count);
608 /* vm_op->open() can't fail. */
609 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
610 /* copy original value if necessary */
611 privcmd_vma->is_privcmd_mmapped = old_privcmd_vma->is_privcmd_mmapped;
613 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
614 }
616 static void
617 xen_ia64_privcmd_vma_close(struct vm_area_struct *vma)
618 {
619 struct xen_ia64_privcmd_vma *privcmd_vma =
620 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
621 struct xen_ia64_privcmd_range *privcmd_range = privcmd_vma->range;
622 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
623 unsigned long i;
625 for (i = 0; i < privcmd_vma->num_entries; i++) {
626 xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
627 cond_resched();
628 }
629 vma->vm_private_data = NULL;
630 kfree(privcmd_vma);
632 if (atomic_dec_and_test(&privcmd_range->ref_count)) {
633 #if 1
634 for (i = 0; i < privcmd_range->num_entries; i++) {
635 struct xen_ia64_privcmd_entry *entry =
636 &privcmd_range->entries[i];
637 BUG_ON(atomic_read(&entry->map_count) != 0);
638 BUG_ON(entry->gpfn != INVALID_GPFN);
639 }
640 #endif
641 if (privcmd_range->callback)
642 (*privcmd_range->callback)(privcmd_range,
643 privcmd_range->private);
644 release_resource(privcmd_range->res);
645 kfree(privcmd_range->res);
646 vfree(privcmd_range);
647 }
648 }
650 int
651 privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
652 {
653 struct xen_ia64_privcmd_vma *privcmd_vma =
654 (struct xen_ia64_privcmd_vma *)vma->vm_private_data;
655 return (xchg(&privcmd_vma->is_privcmd_mmapped, 1) == 0);
656 }
658 int
659 privcmd_mmap(struct file * file, struct vm_area_struct * vma)
660 {
661 int error;
662 unsigned long size = vma->vm_end - vma->vm_start;
663 unsigned long num_entries = size >> PAGE_SHIFT;
664 struct xen_ia64_privcmd_range *privcmd_range = NULL;
665 struct xen_ia64_privcmd_vma *privcmd_vma = NULL;
666 struct resource *res = NULL;
667 unsigned long i;
668 BUG_ON(!is_running_on_xen());
670 BUG_ON(file->private_data != NULL);
672 error = -ENOMEM;
673 privcmd_range =
674 vmalloc(sizeof(*privcmd_range) +
675 sizeof(privcmd_range->entries[0]) * num_entries);
676 if (privcmd_range == NULL)
677 goto out_enomem0;
678 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
679 if (privcmd_vma == NULL)
680 goto out_enomem1;
681 privcmd_vma->is_privcmd_mmapped = 0;
683 res = kzalloc(sizeof(*res), GFP_KERNEL);
684 if (res == NULL)
685 goto out_enomem1;
686 res->name = "Xen privcmd mmap";
687 error = allocate_resource(&iomem_resource, res, size,
688 privcmd_resource_min, privcmd_resource_max,
689 privcmd_resource_align, NULL, NULL);
690 if (error)
691 goto out_enomem1;
692 privcmd_range->res = res;
694 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
695 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
697 atomic_set(&privcmd_range->ref_count, 1);
698 privcmd_range->pgoff = vma->vm_pgoff;
699 privcmd_range->num_entries = num_entries;
700 privcmd_range->private = NULL;
701 privcmd_range->callback = NULL;
702 for (i = 0; i < privcmd_range->num_entries; i++)
703 xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
705 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
706 return 0;
708 out_enomem1:
709 kfree(res);
710 kfree(privcmd_vma);
711 out_enomem0:
712 vfree(privcmd_range);
713 return error;
714 }
716 int
717 direct_remap_pfn_range(struct vm_area_struct *vma,
718 unsigned long address, /* process virtual address */
719 unsigned long gmfn, /* gmfn, gmfn + 1, ... gmfn + size/PAGE_SIZE */
720 unsigned long size,
721 pgprot_t prot,
722 domid_t domid) /* target domain */
723 {
724 struct xen_ia64_privcmd_vma *privcmd_vma =
725 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
726 struct xen_ia64_privcmd_range *privcmd_range = privcmd_vma->range;
727 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
729 unsigned long i;
730 unsigned long offset;
731 int error = 0;
732 BUG_ON(!is_running_on_xen());
734 #if 0
735 if (prot != vm->vm_page_prot)
736 return -EINVAL;
737 #endif
739 i = (address - vma->vm_start) >> PAGE_SHIFT;
740 for (offset = 0; offset < size; offset += PAGE_SIZE) {
741 error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, gmfn, prot, domid);
742 if (error != 0)
743 break;
745 i++;
746 gmfn++;
747 }
749 return error;
750 }
753 /**************************************************************************
754 * expose p2m table
755 */
756 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
757 #include <linux/cpu.h>
758 #include <asm/uaccess.h>
760 int p2m_initialized __read_mostly = 0;
762 unsigned long p2m_min_low_pfn __read_mostly;
763 unsigned long p2m_max_low_pfn __read_mostly;
764 unsigned long p2m_convert_min_pfn __read_mostly;
765 unsigned long p2m_convert_max_pfn __read_mostly;
767 static struct resource p2m_resource = {
768 .name = "Xen p2m table",
769 .flags = IORESOURCE_MEM,
770 };
771 static unsigned long p2m_assign_start_pfn __read_mostly;
772 static unsigned long p2m_assign_end_pfn __read_mostly;
773 static unsigned long p2m_expose_size; /* this is referenced only when resume.
774 * so __read_mostly doesn't make sense.
775 */
776 volatile const pte_t *p2m_pte __read_mostly;
778 #define GRANULE_PFN PTRS_PER_PTE
779 static unsigned long p2m_granule_pfn __read_mostly = GRANULE_PFN;
781 #define ROUNDDOWN(x, y) ((x) & ~((y) - 1))
782 #define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1))
784 #define P2M_PREFIX "Xen p2m: "
786 static int xen_ia64_p2m_expose __read_mostly = 1;
787 module_param(xen_ia64_p2m_expose, int, 0);
788 MODULE_PARM_DESC(xen_ia64_p2m_expose,
789 "enable/disable xen/ia64 p2m exposure optimization\n");
791 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
792 static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
793 module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
794 MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
795 "use/unuse dtr to map exposed p2m table\n");
797 static const int p2m_page_shifts[] = {
798 _PAGE_SIZE_4K,
799 _PAGE_SIZE_8K,
800 _PAGE_SIZE_16K,
801 _PAGE_SIZE_64K,
802 _PAGE_SIZE_256K,
803 _PAGE_SIZE_1M,
804 _PAGE_SIZE_4M,
805 _PAGE_SIZE_16M,
806 _PAGE_SIZE_64M,
807 _PAGE_SIZE_256M,
808 };
810 struct p2m_itr_arg {
811 unsigned long vaddr;
812 unsigned long pteval;
813 unsigned long log_page_size;
814 };
815 static struct p2m_itr_arg p2m_itr_arg __read_mostly;
817 /* This should be in asm-ia64/kregs.h */
818 #define IA64_TR_P2M_TABLE 3
820 static void
821 p2m_itr(void *info)
822 {
823 struct p2m_itr_arg *arg = (struct p2m_itr_arg*)info;
824 ia64_itr(0x2, IA64_TR_P2M_TABLE,
825 arg->vaddr, arg->pteval, arg->log_page_size);
826 ia64_srlz_d();
827 }
829 static int
830 p2m_expose_dtr_call(struct notifier_block *self,
831 unsigned long event, void *ptr)
832 {
833 unsigned int cpu = (unsigned int)(long)ptr;
834 if (event != CPU_ONLINE)
835 return 0;
836 if (p2m_initialized && xen_ia64_p2m_expose_use_dtr) {
837 unsigned int me = get_cpu();
838 if (cpu == me)
839 p2m_itr(&p2m_itr_arg);
840 else
841 smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg,
842 1, 1);
843 put_cpu();
844 }
845 return 0;
846 }
848 static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
849 .notifier_call = p2m_expose_dtr_call,
850 .next = NULL,
851 .priority = 0
852 };
853 #endif
855 static inline unsigned long
856 p2m_table_size(unsigned long num_pfn)
857 {
858 return ((num_pfn + PTRS_PER_PTE - 1) / PTRS_PER_PTE) << PAGE_SHIFT;
859 }
861 static int
862 p2m_expose_init(void)
863 {
864 unsigned long num_pfn;
865 unsigned long p2m_size = 0;
866 unsigned long align = ~0UL;
867 int error = 0;
868 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
869 int i;
870 unsigned long log_page_size = 0;
871 #endif
873 if (!xen_ia64_p2m_expose)
874 return -ENOSYS;
875 if (p2m_initialized)
876 return 0;
878 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
879 error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
880 if (error < 0)
881 return error;
882 #endif
884 lock_cpu_hotplug();
885 if (p2m_initialized)
886 goto out;
888 #ifdef CONFIG_DISCONTIGMEM
889 p2m_min_low_pfn = min_low_pfn;
890 p2m_max_low_pfn = max_low_pfn;
891 #else
892 p2m_min_low_pfn = 0;
893 p2m_max_low_pfn = max_pfn;
894 #endif
896 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
897 if (xen_ia64_p2m_expose_use_dtr) {
898 unsigned long page_size = 0;
899 unsigned long granule_pfn = 0;
900 p2m_size = p2m_table_size(p2m_max_low_pfn - p2m_min_low_pfn);
901 for (i = 0;
902 i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
903 i++) {
904 log_page_size = p2m_page_shifts[i];
905 page_size = 1UL << log_page_size;
906 if (page_size < p2m_size)
907 continue;
909 granule_pfn = max(page_size >> PAGE_SHIFT,
910 p2m_granule_pfn);
911 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
912 granule_pfn);
913 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
914 granule_pfn);
915 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
916 p2m_expose_size = num_pfn << PAGE_SHIFT;
917 p2m_size = p2m_table_size(num_pfn);
918 p2m_size = ROUNDUP(p2m_size,
919 granule_pfn << PAGE_SHIFT);
920 if (p2m_size == page_size)
921 break;
922 }
923 if (p2m_size != page_size) {
924 printk(KERN_ERR "p2m_size != page_size\n");
925 error = -EINVAL;
926 goto out;
927 }
928 align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
929 } else
930 #endif
931 {
932 BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
933 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
934 p2m_granule_pfn);
935 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
936 p2m_granule_pfn);
937 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
938 p2m_expose_size = num_pfn << PAGE_SHIFT;
939 p2m_size = p2m_table_size(num_pfn);
940 p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
941 align = max(privcmd_resource_align,
942 p2m_granule_pfn << PAGE_SHIFT);
943 }
945 /* use privcmd region */
946 error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
947 privcmd_resource_min, privcmd_resource_max,
948 align, NULL, NULL);
949 if (error) {
950 printk(KERN_ERR P2M_PREFIX
951 "can't allocate region for p2m exposure "
952 "[0x%016lx, 0x%016lx] 0x%016lx\n",
953 p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
954 goto out;
955 }
957 p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
958 p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
960 error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
961 p2m_assign_start_pfn,
962 p2m_expose_size, p2m_granule_pfn);
963 if (error) {
964 printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
965 error);
966 printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
967 "expose_size 0x%016lx granule 0x%016lx\n",
968 p2m_convert_min_pfn, p2m_assign_start_pfn,
969 p2m_expose_size, p2m_granule_pfn);;
970 release_resource(&p2m_resource);
971 goto out;
972 }
973 p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
974 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
975 if (xen_ia64_p2m_expose_use_dtr) {
976 p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
977 << PAGE_SHIFT);
978 p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
979 PAGE_KERNEL));
980 p2m_itr_arg.log_page_size = log_page_size;
981 smp_mb();
982 smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
983 p2m_itr(&p2m_itr_arg);
984 }
985 #endif
986 smp_mb();
987 p2m_initialized = 1;
988 printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
989 p2m_convert_min_pfn << PAGE_SHIFT,
990 (p2m_convert_max_pfn << PAGE_SHIFT) + PAGE_SIZE);
991 printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
992 p2m_assign_start_pfn << PAGE_SHIFT,
993 (p2m_assign_end_pfn << PAGE_SHIFT) + PAGE_SIZE,
994 p2m_size / 1024);
995 out:
996 unlock_cpu_hotplug();
997 return error;
998 }
1000 #ifdef notyet
1001 void
1002 p2m_expose_cleanup(void)
1004 BUG_ON(!p2m_initialized);
1005 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
1006 unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
1007 #endif
1008 release_resource(&p2m_resource);
1010 #endif
1012 static void
1013 p2m_expose_resume(void)
1015 int error;
1017 if (!xen_ia64_p2m_expose || !p2m_initialized)
1018 return;
1020 /*
1021 * We can't call {lock, unlock}_cpu_hotplug() because
1022 * they require process context.
1023 * We don't need them because we're the only one cpu and
1024 * interrupts are masked when resume.
1025 */
1026 error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
1027 p2m_assign_start_pfn,
1028 p2m_expose_size, p2m_granule_pfn);
1029 if (error) {
1030 printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
1031 error);
1032 printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
1033 "expose_size 0x%016lx granule 0x%016lx\n",
1034 p2m_convert_min_pfn, p2m_assign_start_pfn,
1035 p2m_expose_size, p2m_granule_pfn);;
1036 p2m_initialized = 0;
1037 smp_mb();
1038 ia64_ptr(0x2, p2m_itr_arg.vaddr, p2m_itr_arg.log_page_size);
1040 /*
1041 * We can't call those clean up functions because they
1042 * require process context.
1043 */
1044 #if 0
1045 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
1046 if (xen_ia64_p2m_expose_use_dtr)
1047 unregister_cpu_notifier(
1048 &p2m_expose_dtr_hotplug_notifier);
1049 #endif
1050 release_resource(&p2m_resource);
1051 #endif
1055 /* XXX inlinize? */
1056 unsigned long
1057 p2m_phystomach(unsigned long gpfn)
1059 volatile const pte_t *pte;
1060 unsigned long mfn;
1061 unsigned long pteval;
1063 if (!p2m_initialized ||
1064 gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
1065 /* || !pfn_valid(gpfn) */)
1066 return INVALID_MFN;
1067 pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
1069 mfn = INVALID_MFN;
1070 if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
1071 pte_present(__pte(pteval)) &&
1072 pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
1073 mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
1075 return mfn;
1078 EXPORT_SYMBOL_GPL(p2m_initialized);
1079 EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
1080 EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
1081 EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
1082 EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
1083 EXPORT_SYMBOL_GPL(p2m_pte);
1084 EXPORT_SYMBOL_GPL(p2m_phystomach);
1086 /**************************************************************************
1087 * foreign domain p2m mapping
1088 */
1089 #include <asm/xen/xencomm.h>
1090 #include <xen/public/privcmd.h>
1092 struct foreign_p2m_private {
1093 unsigned long gpfn;
1094 domid_t domid;
1095 };
1097 static void
1098 xen_foreign_p2m_unexpose(struct xen_ia64_privcmd_range *privcmd_range,
1099 void *arg)
1101 struct foreign_p2m_private *private = (struct foreign_p2m_private*)arg;
1102 int ret;
1104 privcmd_range->private = NULL;
1105 privcmd_range->callback = NULL;
1107 ret = HYPERVISOR_unexpose_foreign_p2m(private->gpfn, private->domid);
1108 if (ret)
1109 printk(KERN_WARNING
1110 "unexpose_foreign_p2m hypercall failed.\n");
1111 kfree(private);
1114 int
1115 xen_foreign_p2m_expose(privcmd_hypercall_t *hypercall)
1117 /*
1118 * hypercall->
1119 * arg0: cmd = IA64_DOM0VP_expose_foreign_p2m
1120 * arg1: va
1121 * arg2: domid
1122 * arg3: __user* memmap_info
1123 * arg4: flags
1124 */
1126 int ret = 0;
1127 struct mm_struct *mm = current->mm;
1129 unsigned long vaddr = hypercall->arg[1];
1130 domid_t domid = hypercall->arg[2];
1131 struct xen_ia64_memmap_info __user *u_memmap_info =
1132 (struct xen_ia64_memmap_info __user *)hypercall->arg[3];
1134 struct xen_ia64_memmap_info memmap_info;
1135 size_t memmap_size;
1136 struct xen_ia64_memmap_info *k_memmap_info = NULL;
1137 unsigned long max_gpfn;
1138 unsigned long p2m_size;
1139 struct resource *res;
1140 unsigned long gpfn;
1142 struct vm_area_struct *vma;
1143 void *p;
1144 unsigned long prev_src_gpfn_end;
1146 struct xen_ia64_privcmd_vma *privcmd_vma;
1147 struct xen_ia64_privcmd_range *privcmd_range;
1148 struct foreign_p2m_private *private = NULL;
1150 BUG_ON(hypercall->arg[0] != IA64_DOM0VP_expose_foreign_p2m);
1152 private = kmalloc(sizeof(*private), GFP_KERNEL);
1153 if (private == NULL)
1154 goto kfree_out;
1156 if (copy_from_user(&memmap_info, u_memmap_info, sizeof(memmap_info)))
1157 return -EFAULT;
1158 /* memmap_info integrity check */
1159 if (memmap_info.efi_memdesc_size < sizeof(efi_memory_desc_t) ||
1160 memmap_info.efi_memmap_size < memmap_info.efi_memdesc_size ||
1161 (memmap_info.efi_memmap_size % memmap_info.efi_memdesc_size)
1162 != 0) {
1163 ret = -EINVAL;
1164 goto kfree_out;
1167 memmap_size = sizeof(*k_memmap_info) + memmap_info.efi_memmap_size;
1168 k_memmap_info = kmalloc(memmap_size, GFP_KERNEL);
1169 if (k_memmap_info == NULL)
1170 return -ENOMEM;
1171 if (copy_from_user(k_memmap_info, u_memmap_info, memmap_size)) {
1172 ret = -EFAULT;
1173 goto kfree_out;
1175 /* k_memmap_info integrity check is done by the expose foreng p2m
1176 hypercall */
1178 max_gpfn = HYPERVISOR_memory_op(XENMEM_maximum_gpfn, &domid);
1179 if (max_gpfn < 0) {
1180 ret = max_gpfn;
1181 goto kfree_out;
1183 p2m_size = p2m_table_size(max_gpfn + 1);
1185 down_write(&mm->mmap_sem);
1187 vma = find_vma(mm, vaddr);
1188 if (vma == NULL || vma->vm_ops != &xen_ia64_privcmd_vm_ops ||
1189 vaddr != vma->vm_start ||
1190 (vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_EXEC) ||
1191 !privcmd_enforce_singleshot_mapping(vma))
1192 goto mmap_out;
1194 privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
1195 res = privcmd_vma->range->res;
1196 if (p2m_size > (res->end - res->start + 1) ||
1197 p2m_size > vma->vm_end - vma->vm_start) {
1198 ret = -EINVAL;
1199 goto mmap_out;
1202 gpfn = res->start >> PAGE_SHIFT;
1203 /*
1204 * arg0: dest_gpfn
1205 * arg1: domid
1206 * arg2: XEN_GUEST_HANDLE(char) buffer: memmap_info
1207 * arg3: flags
1208 * The hypercall checks its intergirty/simplfies it and
1209 * copy it back for us.
1210 */
1211 ret = xencomm_arch_expose_foreign_p2m(gpfn, domid,
1212 xencomm_map_no_alloc(k_memmap_info, memmap_size),
1213 hypercall->arg[4]);
1214 if (ret)
1215 goto mmap_out;
1217 privcmd_range = (struct xen_ia64_privcmd_range*)privcmd_vma->range;
1218 prev_src_gpfn_end = 0;
1219 for (p = k_memmap_info->memdesc;
1220 p < (void*)&k_memmap_info->memdesc[0] +
1221 k_memmap_info->efi_memmap_size;
1222 p += k_memmap_info->efi_memdesc_size) {
1223 efi_memory_desc_t* md = p;
1224 unsigned long src_gpfn = md->phys_addr >> PAGE_SHIFT;
1225 unsigned long src_gpfn_end =
1226 (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
1227 PAGE_SHIFT;
1228 unsigned long num_src_gpfn;
1229 unsigned long gpfn_offset;
1230 unsigned long size;
1231 unsigned int i;
1233 if (src_gpfn <= prev_src_gpfn_end)
1234 src_gpfn = prev_src_gpfn_end + 1;
1235 if (src_gpfn_end <= prev_src_gpfn_end)
1236 continue;
1238 src_gpfn &= ~(PTRS_PER_PTE - 1);
1239 src_gpfn_end = (src_gpfn_end + PTRS_PER_PTE - 1) &
1240 ~(PTRS_PER_PTE - 1);
1241 num_src_gpfn = src_gpfn_end - src_gpfn;
1242 gpfn_offset = src_gpfn / PTRS_PER_PTE;
1243 size = p2m_table_size(num_src_gpfn);
1245 prev_src_gpfn_end = src_gpfn_end;
1246 ret = remap_pfn_range(vma,
1247 vaddr + (gpfn_offset << PAGE_SHIFT),
1248 gpfn + gpfn_offset, size,
1249 vma->vm_page_prot);
1250 if (ret) {
1251 for (i = 0; i < gpfn + gpfn_offset; i++) {
1252 struct xen_ia64_privcmd_entry *entry =
1253 &privcmd_range->entries[i];
1254 BUG_ON(atomic_read(&entry->map_count) != 1 &&
1255 atomic_read(&entry->map_count) != 0);
1256 atomic_set(&entry->map_count, 0);
1257 entry->gpfn = INVALID_GPFN;
1259 (void)HYPERVISOR_unexpose_foreign_p2m(gpfn, domid);
1260 goto mmap_out;
1263 for (i = gpfn_offset;
1264 i < gpfn_offset + (size >> PAGE_SHIFT);
1265 i++) {
1266 struct xen_ia64_privcmd_entry *entry =
1267 &privcmd_range->entries[i];
1268 BUG_ON(atomic_read(&entry->map_count) != 0);
1269 BUG_ON(entry->gpfn != INVALID_GPFN);
1270 atomic_inc(&entry->map_count);
1271 entry->gpfn = gpfn + i;
1275 private->gpfn = gpfn;
1276 private->domid = domid;
1278 privcmd_range->callback = &xen_foreign_p2m_unexpose;
1279 privcmd_range->private = private;
1281 mmap_out:
1282 up_write(&mm->mmap_sem);
1283 kfree_out:
1284 kfree(k_memmap_info);
1285 if (ret != 0)
1286 kfree(private);
1287 return ret;
1289 #endif
1291 /**************************************************************************
1292 * for xenoprof
1293 */
1294 struct resource*
1295 xen_ia64_allocate_resource(unsigned long size)
1297 struct resource *res;
1298 int error;
1300 res = kzalloc(sizeof(*res), GFP_KERNEL);
1301 if (res == NULL)
1302 return ERR_PTR(-ENOMEM);
1304 res->name = "Xen";
1305 res->flags = IORESOURCE_MEM;
1306 error = allocate_resource(&iomem_resource, res, PAGE_ALIGN(size),
1307 privcmd_resource_min, privcmd_resource_max,
1308 IA64_GRANULE_SIZE, NULL, NULL);
1309 if (error) {
1310 kfree(res);
1311 return ERR_PTR(error);
1313 return res;
1315 EXPORT_SYMBOL_GPL(xen_ia64_allocate_resource);
1317 void
1318 xen_ia64_release_resource(struct resource *res)
1320 release_resource(res);
1321 kfree(res);
1323 EXPORT_SYMBOL_GPL(xen_ia64_release_resource);
1325 void
1326 xen_ia64_unmap_resource(struct resource *res)
1328 unsigned long gpfn = res->start >> PAGE_SHIFT;
1329 unsigned long nr_pages = (res->end - res->start) >> PAGE_SHIFT;
1330 unsigned long i;
1332 for (i = 0; i < nr_pages; i++) {
1333 int error = HYPERVISOR_zap_physmap(gpfn + i, 0);
1334 if (error)
1335 printk(KERN_ERR
1336 "%s:%d zap_phsymap failed %d gpfn %lx\n",
1337 __func__, __LINE__, error, gpfn + i);
1339 xen_ia64_release_resource(res);
1341 EXPORT_SYMBOL_GPL(xen_ia64_unmap_resource);
1343 /**************************************************************************
1344 * opt feature
1345 */
1346 void
1347 xen_ia64_enable_opt_feature(void)
1349 /* Enable region 7 identity map optimizations in Xen */
1350 struct xen_ia64_opt_feature optf;
1352 optf.cmd = XEN_IA64_OPTF_IDENT_MAP_REG7;
1353 optf.on = XEN_IA64_OPTF_ON;
1354 optf.pgprot = pgprot_val(PAGE_KERNEL);
1355 optf.key = 0; /* No key on linux. */
1356 HYPERVISOR_opt_feature(&optf);
1359 /**************************************************************************
1360 * suspend/resume
1361 */
1362 void
1363 xen_post_suspend(int suspend_cancelled)
1365 if (suspend_cancelled)
1366 return;
1368 p2m_expose_resume();
1369 xen_ia64_enable_opt_feature();
1370 /* add more if necessary */