ia64/xen-unstable

view linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c @ 11729:3e9fcbee3c09

[IA64] fix sparse tree build with p2m exposure disabled

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
author awilliam@xenbuild.aw
date Wed Oct 04 22:13:06 2006 -0600 (2006-10-04)
parents d1d9f3f6ca09
children b725c9e51a7c
line source
1 /******************************************************************************
2 * include/asm-ia64/shadow.h
3 *
4 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
5 * VA Linux Systems Japan K.K.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
23 //#include <linux/kernel.h>
24 #include <linux/spinlock.h>
25 #include <linux/bootmem.h>
26 #include <linux/module.h>
27 #include <linux/vmalloc.h>
28 #include <asm/page.h>
29 #include <asm/hypervisor.h>
30 #include <asm/hypercall.h>
31 #include <xen/interface/memory.h>
32 #include <xen/balloon.h>
34 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
35 EXPORT_SYMBOL(HYPERVISOR_shared_info);
37 start_info_t *xen_start_info;
38 EXPORT_SYMBOL(xen_start_info);
40 int running_on_xen;
41 EXPORT_SYMBOL(running_on_xen);
43 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
44 static int p2m_expose_init(void);
45 #else
46 #define p2m_expose_init() (-ENOSYS)
47 #endif
49 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
50 // move those to lib/contiguous_bitmap?
51 //XXX discontigmem/sparsemem
53 /*
54 * Bitmap is indexed by page number. If bit is set, the page is part of a
55 * xen_create_contiguous_region() area of memory.
56 */
57 unsigned long *contiguous_bitmap;
59 void
60 contiguous_bitmap_init(unsigned long end_pfn)
61 {
62 unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
63 contiguous_bitmap = alloc_bootmem_low_pages(size);
64 BUG_ON(!contiguous_bitmap);
65 memset(contiguous_bitmap, 0, size);
66 }
68 #if 0
69 int
70 contiguous_bitmap_test(void* p)
71 {
72 return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap);
73 }
74 #endif
76 static void contiguous_bitmap_set(
77 unsigned long first_page, unsigned long nr_pages)
78 {
79 unsigned long start_off, end_off, curr_idx, end_idx;
81 curr_idx = first_page / BITS_PER_LONG;
82 start_off = first_page & (BITS_PER_LONG-1);
83 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
84 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
86 if (curr_idx == end_idx) {
87 contiguous_bitmap[curr_idx] |=
88 ((1UL<<end_off)-1) & -(1UL<<start_off);
89 } else {
90 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
91 while ( ++curr_idx < end_idx )
92 contiguous_bitmap[curr_idx] = ~0UL;
93 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
94 }
95 }
97 static void contiguous_bitmap_clear(
98 unsigned long first_page, unsigned long nr_pages)
99 {
100 unsigned long start_off, end_off, curr_idx, end_idx;
102 curr_idx = first_page / BITS_PER_LONG;
103 start_off = first_page & (BITS_PER_LONG-1);
104 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
105 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
107 if (curr_idx == end_idx) {
108 contiguous_bitmap[curr_idx] &=
109 -(1UL<<end_off) | ((1UL<<start_off)-1);
110 } else {
111 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
112 while ( ++curr_idx != end_idx )
113 contiguous_bitmap[curr_idx] = 0;
114 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
115 }
116 }
118 // __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
119 // are based on i386 xen_create_contiguous_region(),
120 // xen_destroy_contiguous_region()
122 /* Protected by balloon_lock. */
123 #define MAX_CONTIG_ORDER 7
124 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
126 /* Ensure multi-page extents are contiguous in machine memory. */
127 int
128 __xen_create_contiguous_region(unsigned long vstart,
129 unsigned int order, unsigned int address_bits)
130 {
131 unsigned long error = 0;
132 unsigned long gphys = __pa(vstart);
133 unsigned long start_gpfn = gphys >> PAGE_SHIFT;
134 unsigned long num_gpfn = 1 << order;
135 unsigned long i;
136 unsigned long flags;
138 unsigned long *in_frames = discontig_frames, out_frame;
139 int success;
140 struct xen_memory_exchange exchange = {
141 .in = {
142 .nr_extents = num_gpfn,
143 .extent_order = 0,
144 .domid = DOMID_SELF
145 },
146 .out = {
147 .nr_extents = 1,
148 .extent_order = order,
149 .address_bits = address_bits,
150 .domid = DOMID_SELF
151 },
152 .nr_exchanged = 0
153 };
155 if (unlikely(order > MAX_CONTIG_ORDER))
156 return -ENOMEM;
158 set_xen_guest_handle(exchange.in.extent_start, in_frames);
159 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
161 scrub_pages(vstart, num_gpfn);
163 balloon_lock(flags);
165 /* Get a new contiguous memory extent. */
166 for (i = 0; i < num_gpfn; i++) {
167 in_frames[i] = start_gpfn + i;
168 }
169 out_frame = start_gpfn;
170 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
171 success = (exchange.nr_exchanged == num_gpfn);
172 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
173 BUG_ON(success && (error != 0));
174 if (unlikely(error == -ENOSYS)) {
175 /* Compatibility when XENMEM_exchange is unsupported. */
176 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
177 &exchange.in);
178 BUG_ON(error != num_gpfn);
179 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
180 &exchange.out);
181 if (error != 1) {
182 /* Couldn't get special memory: fall back to normal. */
183 for (i = 0; i < num_gpfn; i++) {
184 in_frames[i] = start_gpfn + i;
185 }
186 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
187 &exchange.in);
188 BUG_ON(error != num_gpfn);
189 success = 0;
190 } else
191 success = 1;
192 }
193 if (success)
194 contiguous_bitmap_set(start_gpfn, num_gpfn);
195 #if 0
196 if (success) {
197 unsigned long mfn;
198 unsigned long mfn_prev = ~0UL;
199 for (i = 0; i < num_gpfn; i++) {
200 mfn = pfn_to_mfn_for_dma(start_gpfn + i);
201 if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
202 xprintk("\n");
203 xprintk("%s:%d order %d "
204 "start 0x%lx bus 0x%lx "
205 "machine 0x%lx\n",
206 __func__, __LINE__, order,
207 vstart, virt_to_bus((void*)vstart),
208 phys_to_machine_for_dma(gphys));
209 xprintk("mfn: ");
210 for (i = 0; i < num_gpfn; i++) {
211 mfn = pfn_to_mfn_for_dma(
212 start_gpfn + i);
213 xprintk("0x%lx ", mfn);
214 }
215 xprintk("\n");
216 break;
217 }
218 mfn_prev = mfn;
219 }
220 }
221 #endif
222 balloon_unlock(flags);
223 return success? 0: -ENOMEM;
224 }
226 void
227 __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
228 {
229 unsigned long flags;
230 unsigned long error = 0;
231 unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
232 unsigned long num_gpfn = 1UL << order;
233 unsigned long i;
235 unsigned long *out_frames = discontig_frames, in_frame;
236 int success;
237 struct xen_memory_exchange exchange = {
238 .in = {
239 .nr_extents = 1,
240 .extent_order = order,
241 .domid = DOMID_SELF
242 },
243 .out = {
244 .nr_extents = num_gpfn,
245 .extent_order = 0,
246 .address_bits = 0,
247 .domid = DOMID_SELF
248 },
249 .nr_exchanged = 0
250 };
253 if (!test_bit(start_gpfn, contiguous_bitmap))
254 return;
256 if (unlikely(order > MAX_CONTIG_ORDER))
257 return;
259 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
260 set_xen_guest_handle(exchange.out.extent_start, out_frames);
262 scrub_pages(vstart, num_gpfn);
264 balloon_lock(flags);
266 contiguous_bitmap_clear(start_gpfn, num_gpfn);
268 /* Do the exchange for non-contiguous MFNs. */
269 in_frame = start_gpfn;
270 for (i = 0; i < num_gpfn; i++) {
271 out_frames[i] = start_gpfn + i;
272 }
273 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
274 success = (exchange.nr_exchanged == 1);
275 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
276 BUG_ON(success && (error != 0));
277 if (unlikely(error == -ENOSYS)) {
278 /* Compatibility when XENMEM_exchange is unsupported. */
279 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
280 &exchange.in);
281 BUG_ON(error != 1);
283 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
284 &exchange.out);
285 BUG_ON(error != num_gpfn);
286 }
287 balloon_unlock(flags);
288 }
291 ///////////////////////////////////////////////////////////////////////////
292 // grant table hack
293 // cmd: GNTTABOP_xxx
295 #include <linux/mm.h>
296 #include <xen/interface/xen.h>
297 #include <xen/gnttab.h>
299 static void
300 gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
301 {
302 uint32_t flags;
304 flags = uop->flags;
306 if (flags & GNTMAP_host_map) {
307 if (flags & GNTMAP_application_map) {
308 xprintd("GNTMAP_application_map is not supported yet: flags 0x%x\n", flags);
309 BUG();
310 }
311 if (flags & GNTMAP_contains_pte) {
312 xprintd("GNTMAP_contains_pte is not supported yet flags 0x%x\n", flags);
313 BUG();
314 }
315 } else if (flags & GNTMAP_device_map) {
316 xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
317 BUG();//XXX not yet. actually this flag is not used.
318 } else {
319 BUG();
320 }
321 }
323 int
324 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
325 {
326 if (cmd == GNTTABOP_map_grant_ref) {
327 unsigned int i;
328 for (i = 0; i < count; i++) {
329 gnttab_map_grant_ref_pre(
330 (struct gnttab_map_grant_ref*)uop + i);
331 }
332 }
333 return xencomm_mini_hypercall_grant_table_op(cmd, uop, count);
334 }
335 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
337 ///////////////////////////////////////////////////////////////////////////
338 // PageForeign(), SetPageForeign(), ClearPageForeign()
340 struct address_space xen_ia64_foreign_dummy_mapping;
341 EXPORT_SYMBOL(xen_ia64_foreign_dummy_mapping);
343 ///////////////////////////////////////////////////////////////////////////
344 // foreign mapping
345 #include <linux/efi.h>
346 #include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}()
348 static unsigned long privcmd_resource_min = 0;
349 // Xen/ia64 currently can handle pseudo physical address bits up to
350 // (PAGE_SHIFT * 3)
351 static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
352 static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
354 static unsigned long
355 md_end_addr(const efi_memory_desc_t *md)
356 {
357 return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
358 }
360 #define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE (1024 * 1024 * 1024UL)
361 static int
362 xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
363 {
364 return (start < end &&
365 (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
366 }
368 static int __init
369 xen_ia64_privcmd_init(void)
370 {
371 void *efi_map_start, *efi_map_end, *p;
372 u64 efi_desc_size;
373 efi_memory_desc_t *md;
374 unsigned long tmp_min;
375 unsigned long tmp_max;
376 unsigned long gap_size;
377 unsigned long prev_end;
379 if (!is_running_on_xen())
380 return -1;
382 efi_map_start = __va(ia64_boot_param->efi_memmap);
383 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
384 efi_desc_size = ia64_boot_param->efi_memdesc_size;
386 // at first check the used highest address
387 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
388 // nothing
389 }
390 md = p - efi_desc_size;
391 privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
392 if (xen_ia64_privcmd_check_size(privcmd_resource_min,
393 privcmd_resource_max)) {
394 goto out;
395 }
397 // the used highest address is too large. try to find the largest gap.
398 tmp_min = privcmd_resource_max;
399 tmp_max = 0;
400 gap_size = 0;
401 prev_end = 0;
402 for (p = efi_map_start;
403 p < efi_map_end - efi_desc_size;
404 p += efi_desc_size) {
405 unsigned long end;
406 efi_memory_desc_t* next;
407 unsigned long next_start;
409 md = p;
410 end = md_end_addr(md);
411 if (end > privcmd_resource_max) {
412 break;
413 }
414 if (end < prev_end) {
415 // work around.
416 // Xen may pass incompletely sorted memory
417 // descriptors like
418 // [x, x + length]
419 // [x, x]
420 // this order should be reversed.
421 continue;
422 }
423 next = p + efi_desc_size;
424 next_start = next->phys_addr;
425 if (next_start > privcmd_resource_max) {
426 next_start = privcmd_resource_max;
427 }
428 if (end < next_start && gap_size < (next_start - end)) {
429 tmp_min = end;
430 tmp_max = next_start;
431 gap_size = tmp_max - tmp_min;
432 }
433 prev_end = end;
434 }
436 privcmd_resource_min = GRANULEROUNDUP(tmp_min);
437 if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
438 privcmd_resource_max = tmp_max;
439 goto out;
440 }
442 privcmd_resource_min = tmp_min;
443 privcmd_resource_max = tmp_max;
444 if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
445 privcmd_resource_max)) {
446 // Any large enough gap isn't found.
447 // go ahead anyway with the warning hoping that large region
448 // won't be requested.
449 printk(KERN_WARNING "xen privcmd: large enough region for privcmd mmap is not found.\n");
450 }
452 out:
453 printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 0x%lx] (%ldMB)\n",
454 privcmd_resource_min, privcmd_resource_max,
455 (privcmd_resource_max - privcmd_resource_min) >> 20);
456 BUG_ON(privcmd_resource_min >= privcmd_resource_max);
458 // XXX this should be somewhere appropriate
459 (void)p2m_expose_init();
461 return 0;
462 }
463 late_initcall(xen_ia64_privcmd_init);
465 struct xen_ia64_privcmd_entry {
466 atomic_t map_count;
467 #define INVALID_GPFN (~0UL)
468 unsigned long gpfn;
469 };
471 struct xen_ia64_privcmd_range {
472 atomic_t ref_count;
473 unsigned long pgoff; // in PAGE_SIZE
474 struct resource* res;
476 unsigned long num_entries;
477 struct xen_ia64_privcmd_entry entries[0];
478 };
480 struct xen_ia64_privcmd_vma {
481 struct xen_ia64_privcmd_range* range;
483 unsigned long num_entries;
484 struct xen_ia64_privcmd_entry* entries;
485 };
487 static void
488 xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
489 {
490 atomic_set(&entry->map_count, 0);
491 entry->gpfn = INVALID_GPFN;
492 }
494 static int
495 xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
496 unsigned long addr,
497 struct xen_ia64_privcmd_range* privcmd_range,
498 int i,
499 unsigned long mfn,
500 pgprot_t prot,
501 domid_t domid)
502 {
503 int error = 0;
504 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
505 unsigned long gpfn;
506 unsigned long flags;
508 if ((addr & ~PAGE_MASK) != 0 || mfn == INVALID_MFN) {
509 error = -EINVAL;
510 goto out;
511 }
513 if (entry->gpfn != INVALID_GPFN) {
514 error = -EBUSY;
515 goto out;
516 }
517 gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
519 flags = ASSIGN_writable;
520 if (pgprot_val(prot) == PROT_READ) {
521 flags = ASSIGN_readonly;
522 }
523 error = HYPERVISOR_add_physmap(gpfn, mfn, flags, domid);
524 if (error != 0) {
525 goto out;
526 }
528 prot = vma->vm_page_prot;
529 error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
530 if (error != 0) {
531 error = HYPERVISOR_zap_physmap(gpfn, 0);
532 if (error) {
533 BUG();//XXX
534 }
535 } else {
536 atomic_inc(&entry->map_count);
537 entry->gpfn = gpfn;
538 }
540 out:
541 return error;
542 }
544 static void
545 xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range,
546 int i)
547 {
548 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
549 unsigned long gpfn = entry->gpfn;
550 //gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
551 // (vma->vm_pgoff - privcmd_range->pgoff);
552 int error;
554 error = HYPERVISOR_zap_physmap(gpfn, 0);
555 if (error) {
556 BUG();//XXX
557 }
558 entry->gpfn = INVALID_GPFN;
559 }
561 static void
562 xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range,
563 int i)
564 {
565 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
566 if (entry->gpfn != INVALID_GPFN) {
567 atomic_inc(&entry->map_count);
568 } else {
569 BUG_ON(atomic_read(&entry->map_count) != 0);
570 }
571 }
573 static void
574 xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range,
575 int i)
576 {
577 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
578 if (entry->gpfn != INVALID_GPFN &&
579 atomic_dec_and_test(&entry->map_count)) {
580 xen_ia64_privcmd_entry_munmap(privcmd_range, i);
581 }
582 }
584 static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
585 static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
587 struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
588 .open = &xen_ia64_privcmd_vma_open,
589 .close = &xen_ia64_privcmd_vma_close,
590 };
592 static void
593 __xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
594 struct xen_ia64_privcmd_vma* privcmd_vma,
595 struct xen_ia64_privcmd_range* privcmd_range)
596 {
597 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
598 unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
599 unsigned long i;
601 BUG_ON(entry_offset < 0);
602 BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
604 privcmd_vma->range = privcmd_range;
605 privcmd_vma->num_entries = num_entries;
606 privcmd_vma->entries = &privcmd_range->entries[entry_offset];
607 vma->vm_private_data = privcmd_vma;
608 for (i = 0; i < privcmd_vma->num_entries; i++) {
609 xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
610 }
612 vma->vm_private_data = privcmd_vma;
613 vma->vm_ops = &xen_ia64_privcmd_vm_ops;
614 }
616 static void
617 xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
618 {
619 struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
620 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
622 atomic_inc(&privcmd_range->ref_count);
623 // vm_op->open() can't fail.
624 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
626 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
627 }
629 static void
630 xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
631 {
632 struct xen_ia64_privcmd_vma* privcmd_vma =
633 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
634 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
635 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
636 unsigned long i;
638 for (i = 0; i < privcmd_vma->num_entries; i++) {
639 xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
640 }
641 vma->vm_private_data = NULL;
642 kfree(privcmd_vma);
644 if (atomic_dec_and_test(&privcmd_range->ref_count)) {
645 #if 1
646 for (i = 0; i < privcmd_range->num_entries; i++) {
647 struct xen_ia64_privcmd_entry* entry =
648 &privcmd_range->entries[i];
649 BUG_ON(atomic_read(&entry->map_count) != 0);
650 BUG_ON(entry->gpfn != INVALID_GPFN);
651 }
652 #endif
653 release_resource(privcmd_range->res);
654 kfree(privcmd_range->res);
655 vfree(privcmd_range);
656 }
657 }
659 int
660 privcmd_mmap(struct file * file, struct vm_area_struct * vma)
661 {
662 int error;
663 unsigned long size = vma->vm_end - vma->vm_start;
664 unsigned long num_entries = size >> PAGE_SHIFT;
665 struct xen_ia64_privcmd_range* privcmd_range = NULL;
666 struct xen_ia64_privcmd_vma* privcmd_vma = NULL;
667 struct resource* res = NULL;
668 unsigned long i;
669 BUG_ON(!is_running_on_xen());
671 BUG_ON(file->private_data != NULL);
673 error = -ENOMEM;
674 privcmd_range =
675 vmalloc(sizeof(*privcmd_range) +
676 sizeof(privcmd_range->entries[0]) * num_entries);
677 if (privcmd_range == NULL) {
678 goto out_enomem0;
679 }
680 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
681 if (privcmd_vma == NULL) {
682 goto out_enomem1;
683 }
684 res = kzalloc(sizeof(*res), GFP_KERNEL);
685 if (res == NULL) {
686 goto out_enomem1;
687 }
688 res->name = "Xen privcmd mmap";
689 error = allocate_resource(&iomem_resource, res, size,
690 privcmd_resource_min, privcmd_resource_max,
691 privcmd_resource_align, NULL, NULL);
692 if (error) {
693 goto out_enomem1;
694 }
695 privcmd_range->res = res;
697 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
698 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
700 atomic_set(&privcmd_range->ref_count, 1);
701 privcmd_range->pgoff = vma->vm_pgoff;
702 privcmd_range->num_entries = num_entries;
703 for (i = 0; i < privcmd_range->num_entries; i++) {
704 xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
705 }
707 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
708 return 0;
710 out_enomem1:
711 kfree(res);
712 kfree(privcmd_vma);
713 out_enomem0:
714 vfree(privcmd_range);
715 return error;
716 }
718 int
719 direct_remap_pfn_range(struct vm_area_struct *vma,
720 unsigned long address, // process virtual address
721 unsigned long mfn, // mfn, mfn + 1, ... mfn + size/PAGE_SIZE
722 unsigned long size,
723 pgprot_t prot,
724 domid_t domid) // target domain
725 {
726 struct xen_ia64_privcmd_vma* privcmd_vma =
727 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
728 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
729 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
731 unsigned long i;
732 unsigned long offset;
733 int error = 0;
734 BUG_ON(!is_running_on_xen());
736 #if 0
737 if (prot != vm->vm_page_prot) {
738 return -EINVAL;
739 }
740 #endif
742 i = (address - vma->vm_start) >> PAGE_SHIFT;
743 for (offset = 0; offset < size; offset += PAGE_SIZE) {
744 error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, mfn, prot, domid);
745 if (error != 0) {
746 break;
747 }
749 i++;
750 mfn++;
751 }
753 return error;
754 }
757 /* Called after suspend, to resume time. */
758 void
759 time_resume(void)
760 {
761 extern void ia64_cpu_local_tick(void);
763 /* Just trigger a tick. */
764 ia64_cpu_local_tick();
765 }
767 ///////////////////////////////////////////////////////////////////////////
768 // expose p2m table
769 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
770 #include <linux/cpu.h>
771 #include <asm/uaccess.h>
773 int p2m_initialized __read_mostly = 0;
775 unsigned long p2m_min_low_pfn __read_mostly;
776 unsigned long p2m_max_low_pfn __read_mostly;
777 unsigned long p2m_convert_min_pfn __read_mostly;
778 unsigned long p2m_convert_max_pfn __read_mostly;
780 static struct resource p2m_resource = {
781 .name = "Xen p2m table",
782 .flags = IORESOURCE_MEM,
783 };
784 static unsigned long p2m_assign_start_pfn __read_mostly;
785 static unsigned long p2m_assign_end_pfn __read_mostly;
786 volatile const pte_t* p2m_pte __read_mostly;
788 #define GRNULE_PFN PTRS_PER_PTE
789 static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN;
791 #define ROUNDDOWN(x, y) ((x) & ~((y) - 1))
792 #define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1))
794 #define P2M_PREFIX "Xen p2m: "
796 static int xen_ia64_p2m_expose __read_mostly = 1;
797 module_param(xen_ia64_p2m_expose, int, 0);
798 MODULE_PARM_DESC(xen_ia64_p2m_expose,
799 "enable/disable xen/ia64 p2m exposure optimization\n");
801 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
802 static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
803 module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
804 MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
805 "use/unuse dtr to map exposed p2m table\n");
807 static const int p2m_page_shifts[] = {
808 _PAGE_SIZE_4K,
809 _PAGE_SIZE_8K,
810 _PAGE_SIZE_16K,
811 _PAGE_SIZE_64K,
812 _PAGE_SIZE_256K,
813 _PAGE_SIZE_1M,
814 _PAGE_SIZE_4M,
815 _PAGE_SIZE_16M,
816 _PAGE_SIZE_64M,
817 _PAGE_SIZE_256M,
818 };
820 struct p2m_itr_arg {
821 unsigned long vaddr;
822 unsigned long pteval;
823 unsigned long log_page_size;
824 };
825 static struct p2m_itr_arg p2m_itr_arg __read_mostly;
827 // This should be in asm-ia64/kregs.h
828 #define IA64_TR_P2M_TABLE 3
830 static void
831 p2m_itr(void* info)
832 {
833 struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info;
834 ia64_itr(0x2, IA64_TR_P2M_TABLE,
835 arg->vaddr, arg->pteval, arg->log_page_size);
836 ia64_srlz_d();
837 }
839 static int
840 p2m_expose_dtr_call(struct notifier_block *self,
841 unsigned long event, void* ptr)
842 {
843 unsigned int cpu = (unsigned int)(long)ptr;
844 if (event != CPU_ONLINE)
845 return 0;
846 if (!(p2m_initialized && xen_ia64_p2m_expose_use_dtr))
847 smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg, 1, 1);
848 return 0;
849 }
851 static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
852 .notifier_call = p2m_expose_dtr_call,
853 .next = NULL,
854 .priority = 0
855 };
856 #endif
858 static int
859 p2m_expose_init(void)
860 {
861 unsigned long num_pfn;
862 unsigned long size = 0;
863 unsigned long p2m_size = 0;
864 unsigned long align = ~0UL;
865 int error = 0;
866 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
867 int i;
868 unsigned long page_size;
869 unsigned long log_page_size = 0;
870 #endif
872 if (!xen_ia64_p2m_expose)
873 return -ENOSYS;
874 if (p2m_initialized)
875 return 0;
877 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
878 error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
879 if (error < 0)
880 return error;
881 #endif
883 lock_cpu_hotplug();
884 if (p2m_initialized)
885 goto out;
887 #ifdef CONFIG_DISCONTIGMEM
888 p2m_min_low_pfn = min_low_pfn;
889 p2m_max_low_pfn = max_low_pfn;
890 #else
891 p2m_min_low_pfn = 0;
892 p2m_max_low_pfn = max_pfn;
893 #endif
895 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
896 if (xen_ia64_p2m_expose_use_dtr) {
897 unsigned long granule_pfn = 0;
898 p2m_size = p2m_max_low_pfn - p2m_min_low_pfn;
899 for (i = 0;
900 i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
901 i++) {
902 log_page_size = p2m_page_shifts[i];
903 page_size = 1UL << log_page_size;
904 if (page_size < p2m_size)
905 continue;
907 granule_pfn = max(page_size >> PAGE_SHIFT,
908 p2m_granule_pfn);
909 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
910 granule_pfn);
911 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
912 granule_pfn);
913 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
914 size = num_pfn << PAGE_SHIFT;
915 p2m_size = num_pfn / PTRS_PER_PTE;
916 p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT);
917 if (p2m_size == page_size)
918 break;
919 }
920 if (p2m_size != page_size) {
921 printk(KERN_ERR "p2m_size != page_size\n");
922 error = -EINVAL;
923 goto out;
924 }
925 align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
926 } else
927 #endif
928 {
929 BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
930 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
931 p2m_granule_pfn);
932 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn);
933 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
934 size = num_pfn << PAGE_SHIFT;
935 p2m_size = num_pfn / PTRS_PER_PTE;
936 p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
937 align = max(privcmd_resource_align,
938 p2m_granule_pfn << PAGE_SHIFT);
939 }
941 // use privcmd region
942 error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
943 privcmd_resource_min, privcmd_resource_max,
944 align, NULL, NULL);
945 if (error) {
946 printk(KERN_ERR P2M_PREFIX
947 "can't allocate region for p2m exposure "
948 "[0x%016lx, 0x%016lx) 0x%016lx\n",
949 p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
950 goto out;
951 }
953 p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
954 p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
956 error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
957 p2m_assign_start_pfn,
958 size, p2m_granule_pfn);
959 if (error) {
960 printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
961 error);
962 printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
963 "size 0x%016lx granule 0x%016lx\n",
964 p2m_convert_min_pfn, p2m_assign_start_pfn,
965 size, p2m_granule_pfn);;
966 release_resource(&p2m_resource);
967 goto out;
968 }
969 p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
970 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
971 if (xen_ia64_p2m_expose_use_dtr) {
972 p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
973 << PAGE_SHIFT);
974 p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
975 PAGE_KERNEL));
976 p2m_itr_arg.log_page_size = log_page_size;
977 smp_mb();
978 smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
979 p2m_itr(&p2m_itr_arg);
980 }
981 #endif
982 smp_mb();
983 p2m_initialized = 1;
984 printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
985 p2m_convert_min_pfn << PAGE_SHIFT,
986 p2m_convert_max_pfn << PAGE_SHIFT);
987 printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
988 p2m_assign_start_pfn << PAGE_SHIFT,
989 p2m_assign_end_pfn << PAGE_SHIFT,
990 p2m_size / 1024);
991 out:
992 unlock_cpu_hotplug();
993 return error;
994 }
996 #ifdef notyet
997 void
998 p2m_expose_cleanup(void)
999 {
1000 BUG_ON(!p2m_initialized);
1001 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
1002 unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
1003 #endif
1004 release_resource(&p2m_resource);
1006 #endif
1008 //XXX inlinize?
1009 unsigned long
1010 p2m_phystomach(unsigned long gpfn)
1012 volatile const pte_t* pte;
1013 unsigned long mfn;
1014 unsigned long pteval;
1016 if (!p2m_initialized ||
1017 gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
1018 /* || !pfn_valid(gpfn) */)
1019 return INVALID_MFN;
1020 pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
1022 mfn = INVALID_MFN;
1023 if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
1024 pte_present(__pte(pteval)) &&
1025 pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
1026 mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
1028 return mfn;
1031 EXPORT_SYMBOL_GPL(p2m_initialized);
1032 EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
1033 EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
1034 EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
1035 EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
1036 EXPORT_SYMBOL_GPL(p2m_pte);
1037 EXPORT_SYMBOL_GPL(p2m_phystomach);
1038 #endif