ia64/xen-unstable

view linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c @ 15143:e54da168363a

[IA64] Trivial warning fix

contiguous_bitmap_init has implicit declaration

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Alex Williamson <alex.williamson@hp.com>
date Fri May 25 09:40:43 2007 -0600 (2007-05-25)
parents 3ecf1cea58b1
children b1b80a14d023
line source
1 /******************************************************************************
2 * include/asm-ia64/shadow.h
3 *
4 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
5 * VA Linux Systems Japan K.K.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
23 //#include <linux/kernel.h>
24 #include <linux/spinlock.h>
25 #include <linux/bootmem.h>
26 #include <linux/module.h>
27 #include <linux/vmalloc.h>
28 #include <linux/efi.h>
29 #include <asm/page.h>
30 #include <asm/pgalloc.h>
31 #include <asm/meminit.h>
32 #include <asm/hypervisor.h>
33 #include <asm/hypercall.h>
34 #include <xen/interface/memory.h>
35 #include <xen/xencons.h>
36 #include <xen/balloon.h>
38 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
39 EXPORT_SYMBOL(HYPERVISOR_shared_info);
41 start_info_t *xen_start_info;
42 EXPORT_SYMBOL(xen_start_info);
44 int running_on_xen;
45 EXPORT_SYMBOL(running_on_xen);
47 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
48 static int p2m_expose_init(void);
49 #else
50 #define p2m_expose_init() (-ENOSYS)
51 #define p2m_expose_resume() ((void)0)
52 #endif
54 EXPORT_SYMBOL(__hypercall);
56 void __init
57 xen_setup(char **cmdline_p)
58 {
59 extern void dig_setup(char **cmdline_p);
60 if (ia64_platform_is("xen"))
61 dig_setup(cmdline_p);
63 if (!is_running_on_xen() || !is_initial_xendomain())
64 return;
66 if (xen_start_info->console.dom0.info_size >=
67 sizeof(struct dom0_vga_console_info)) {
68 const struct dom0_vga_console_info *info =
69 (struct dom0_vga_console_info *)(
70 (char *)xen_start_info +
71 xen_start_info->console.dom0.info_off);
72 dom0_init_screen_info(info);
73 }
74 xen_start_info->console.domU.mfn = 0;
75 xen_start_info->console.domU.evtchn = 0;
76 }
78 void __cpuinit
79 xen_cpu_init(void)
80 {
81 extern void xen_smp_intr_init(void);
82 xen_smp_intr_init();
83 }
85 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
86 // move those to lib/contiguous_bitmap?
87 //XXX discontigmem/sparsemem
89 /*
90 * Bitmap is indexed by page number. If bit is set, the page is part of a
91 * xen_create_contiguous_region() area of memory.
92 */
93 unsigned long *contiguous_bitmap;
95 #ifdef CONFIG_VIRTUAL_MEM_MAP
96 /* Following logic is stolen from create_mem_map_table() for virtual memmap */
97 static int
98 create_contiguous_bitmap(u64 start, u64 end, void *arg)
99 {
100 unsigned long address, start_page, end_page;
101 unsigned long bitmap_start, bitmap_end;
102 unsigned char *bitmap;
103 int node;
104 pgd_t *pgd;
105 pud_t *pud;
106 pmd_t *pmd;
107 pte_t *pte;
109 bitmap_start = (unsigned long)contiguous_bitmap +
110 ((__pa(start) >> PAGE_SHIFT) >> 3);
111 bitmap_end = (unsigned long)contiguous_bitmap +
112 (((__pa(end) >> PAGE_SHIFT) + 2 * BITS_PER_LONG) >> 3);
114 start_page = bitmap_start & PAGE_MASK;
115 end_page = PAGE_ALIGN(bitmap_end);
116 node = paddr_to_nid(__pa(start));
118 bitmap = alloc_bootmem_pages_node(NODE_DATA(node),
119 end_page - start_page);
120 BUG_ON(!bitmap);
121 memset(bitmap, 0, end_page - start_page);
123 for (address = start_page; address < end_page; address += PAGE_SIZE) {
124 pgd = pgd_offset_k(address);
125 if (pgd_none(*pgd))
126 pgd_populate(&init_mm, pgd,
127 alloc_bootmem_pages_node(NODE_DATA(node),
128 PAGE_SIZE));
129 pud = pud_offset(pgd, address);
131 if (pud_none(*pud))
132 pud_populate(&init_mm, pud,
133 alloc_bootmem_pages_node(NODE_DATA(node),
134 PAGE_SIZE));
135 pmd = pmd_offset(pud, address);
137 if (pmd_none(*pmd))
138 pmd_populate_kernel(&init_mm, pmd,
139 alloc_bootmem_pages_node
140 (NODE_DATA(node), PAGE_SIZE));
141 pte = pte_offset_kernel(pmd, address);
143 if (pte_none(*pte))
144 set_pte(pte,
145 pfn_pte(__pa(bitmap + (address - start_page))
146 >> PAGE_SHIFT, PAGE_KERNEL));
147 }
148 return 0;
149 }
150 #endif
152 static void
153 __contiguous_bitmap_init(unsigned long size)
154 {
155 contiguous_bitmap = alloc_bootmem_pages(size);
156 BUG_ON(!contiguous_bitmap);
157 memset(contiguous_bitmap, 0, size);
158 }
160 void
161 xen_contiguous_bitmap_init(unsigned long end_pfn)
162 {
163 unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
164 #ifndef CONFIG_VIRTUAL_MEM_MAP
165 __contiguous_bitmap_init(size);
166 #else
167 unsigned long max_gap = 0;
169 efi_memmap_walk(find_largest_hole, (u64*)&max_gap);
170 if (max_gap < LARGE_GAP) {
171 __contiguous_bitmap_init(size);
172 } else {
173 unsigned long map_size = PAGE_ALIGN(size);
174 vmalloc_end -= map_size;
175 contiguous_bitmap = (unsigned long*)vmalloc_end;
176 efi_memmap_walk(create_contiguous_bitmap, NULL);
177 }
178 #endif
179 }
181 #if 0
182 int
183 contiguous_bitmap_test(void* p)
184 {
185 return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap);
186 }
187 #endif
189 static void contiguous_bitmap_set(
190 unsigned long first_page, unsigned long nr_pages)
191 {
192 unsigned long start_off, end_off, curr_idx, end_idx;
194 curr_idx = first_page / BITS_PER_LONG;
195 start_off = first_page & (BITS_PER_LONG-1);
196 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
197 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
199 if (curr_idx == end_idx) {
200 contiguous_bitmap[curr_idx] |=
201 ((1UL<<end_off)-1) & -(1UL<<start_off);
202 } else {
203 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
204 while ( ++curr_idx < end_idx )
205 contiguous_bitmap[curr_idx] = ~0UL;
206 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
207 }
208 }
210 static void contiguous_bitmap_clear(
211 unsigned long first_page, unsigned long nr_pages)
212 {
213 unsigned long start_off, end_off, curr_idx, end_idx;
215 curr_idx = first_page / BITS_PER_LONG;
216 start_off = first_page & (BITS_PER_LONG-1);
217 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
218 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
220 if (curr_idx == end_idx) {
221 contiguous_bitmap[curr_idx] &=
222 -(1UL<<end_off) | ((1UL<<start_off)-1);
223 } else {
224 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
225 while ( ++curr_idx != end_idx )
226 contiguous_bitmap[curr_idx] = 0;
227 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
228 }
229 }
231 // __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
232 // are based on i386 xen_create_contiguous_region(),
233 // xen_destroy_contiguous_region()
235 /* Protected by balloon_lock. */
236 #define MAX_CONTIG_ORDER 7
237 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
239 /* Ensure multi-page extents are contiguous in machine memory. */
240 int
241 __xen_create_contiguous_region(unsigned long vstart,
242 unsigned int order, unsigned int address_bits)
243 {
244 unsigned long error = 0;
245 unsigned long gphys = __pa(vstart);
246 unsigned long start_gpfn = gphys >> PAGE_SHIFT;
247 unsigned long num_gpfn = 1 << order;
248 unsigned long i;
249 unsigned long flags;
251 unsigned long *in_frames = discontig_frames, out_frame;
252 int success;
253 struct xen_memory_exchange exchange = {
254 .in = {
255 .nr_extents = num_gpfn,
256 .extent_order = 0,
257 .domid = DOMID_SELF
258 },
259 .out = {
260 .nr_extents = 1,
261 .extent_order = order,
262 .address_bits = address_bits,
263 .domid = DOMID_SELF
264 },
265 .nr_exchanged = 0
266 };
268 if (unlikely(order > MAX_CONTIG_ORDER))
269 return -ENOMEM;
271 set_xen_guest_handle(exchange.in.extent_start, in_frames);
272 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
274 scrub_pages(vstart, num_gpfn);
276 balloon_lock(flags);
278 /* Get a new contiguous memory extent. */
279 for (i = 0; i < num_gpfn; i++) {
280 in_frames[i] = start_gpfn + i;
281 }
282 out_frame = start_gpfn;
283 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
284 success = (exchange.nr_exchanged == num_gpfn);
285 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
286 BUG_ON(success && (error != 0));
287 if (unlikely(error == -ENOSYS)) {
288 /* Compatibility when XENMEM_exchange is unsupported. */
289 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
290 &exchange.in);
291 BUG_ON(error != num_gpfn);
292 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
293 &exchange.out);
294 if (error != 1) {
295 /* Couldn't get special memory: fall back to normal. */
296 for (i = 0; i < num_gpfn; i++) {
297 in_frames[i] = start_gpfn + i;
298 }
299 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
300 &exchange.in);
301 BUG_ON(error != num_gpfn);
302 success = 0;
303 } else
304 success = 1;
305 }
306 if (success)
307 contiguous_bitmap_set(start_gpfn, num_gpfn);
308 #if 0
309 if (success) {
310 unsigned long mfn;
311 unsigned long mfn_prev = ~0UL;
312 for (i = 0; i < num_gpfn; i++) {
313 mfn = pfn_to_mfn_for_dma(start_gpfn + i);
314 if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
315 xprintk("\n");
316 xprintk("%s:%d order %d "
317 "start 0x%lx bus 0x%lx "
318 "machine 0x%lx\n",
319 __func__, __LINE__, order,
320 vstart, virt_to_bus((void*)vstart),
321 phys_to_machine_for_dma(gphys));
322 xprintk("mfn: ");
323 for (i = 0; i < num_gpfn; i++) {
324 mfn = pfn_to_mfn_for_dma(
325 start_gpfn + i);
326 xprintk("0x%lx ", mfn);
327 }
328 xprintk("\n");
329 break;
330 }
331 mfn_prev = mfn;
332 }
333 }
334 #endif
335 balloon_unlock(flags);
336 return success? 0: -ENOMEM;
337 }
339 void
340 __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
341 {
342 unsigned long flags;
343 unsigned long error = 0;
344 unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
345 unsigned long num_gpfn = 1UL << order;
346 unsigned long i;
348 unsigned long *out_frames = discontig_frames, in_frame;
349 int success;
350 struct xen_memory_exchange exchange = {
351 .in = {
352 .nr_extents = 1,
353 .extent_order = order,
354 .domid = DOMID_SELF
355 },
356 .out = {
357 .nr_extents = num_gpfn,
358 .extent_order = 0,
359 .address_bits = 0,
360 .domid = DOMID_SELF
361 },
362 .nr_exchanged = 0
363 };
366 if (!test_bit(start_gpfn, contiguous_bitmap))
367 return;
369 if (unlikely(order > MAX_CONTIG_ORDER))
370 return;
372 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
373 set_xen_guest_handle(exchange.out.extent_start, out_frames);
375 scrub_pages(vstart, num_gpfn);
377 balloon_lock(flags);
379 contiguous_bitmap_clear(start_gpfn, num_gpfn);
381 /* Do the exchange for non-contiguous MFNs. */
382 in_frame = start_gpfn;
383 for (i = 0; i < num_gpfn; i++) {
384 out_frames[i] = start_gpfn + i;
385 }
386 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
387 success = (exchange.nr_exchanged == 1);
388 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
389 BUG_ON(success && (error != 0));
390 if (unlikely(error == -ENOSYS)) {
391 /* Compatibility when XENMEM_exchange is unsupported. */
392 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
393 &exchange.in);
394 BUG_ON(error != 1);
396 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
397 &exchange.out);
398 BUG_ON(error != num_gpfn);
399 }
400 balloon_unlock(flags);
401 }
404 ///////////////////////////////////////////////////////////////////////////
405 // grant table hack
406 // cmd: GNTTABOP_xxx
408 #include <linux/mm.h>
409 #include <xen/interface/xen.h>
410 #include <xen/gnttab.h>
412 static void
413 gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
414 {
415 uint32_t flags;
417 flags = uop->flags;
419 if (flags & GNTMAP_host_map) {
420 if (flags & GNTMAP_application_map) {
421 xprintd("GNTMAP_application_map is not supported yet: flags 0x%x\n", flags);
422 BUG();
423 }
424 if (flags & GNTMAP_contains_pte) {
425 xprintd("GNTMAP_contains_pte is not supported yet flags 0x%x\n", flags);
426 BUG();
427 }
428 } else if (flags & GNTMAP_device_map) {
429 xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
430 BUG();//XXX not yet. actually this flag is not used.
431 } else {
432 BUG();
433 }
434 }
436 int
437 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
438 {
439 if (cmd == GNTTABOP_map_grant_ref) {
440 unsigned int i;
441 for (i = 0; i < count; i++) {
442 gnttab_map_grant_ref_pre(
443 (struct gnttab_map_grant_ref*)uop + i);
444 }
445 }
446 return xencomm_mini_hypercall_grant_table_op(cmd, uop, count);
447 }
448 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
450 ///////////////////////////////////////////////////////////////////////////
451 // foreign mapping
452 #include <linux/efi.h>
453 #include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}()
455 static unsigned long privcmd_resource_min = 0;
456 // Xen/ia64 currently can handle pseudo physical address bits up to
457 // (PAGE_SHIFT * 3)
458 static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
459 static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
461 static unsigned long
462 md_end_addr(const efi_memory_desc_t *md)
463 {
464 return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
465 }
467 #define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE (1024 * 1024 * 1024UL)
468 static int
469 xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
470 {
471 return (start < end &&
472 (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
473 }
475 static int __init
476 xen_ia64_privcmd_init(void)
477 {
478 void *efi_map_start, *efi_map_end, *p;
479 u64 efi_desc_size;
480 efi_memory_desc_t *md;
481 unsigned long tmp_min;
482 unsigned long tmp_max;
483 unsigned long gap_size;
484 unsigned long prev_end;
486 if (!is_running_on_xen())
487 return -1;
489 efi_map_start = __va(ia64_boot_param->efi_memmap);
490 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
491 efi_desc_size = ia64_boot_param->efi_memdesc_size;
493 // at first check the used highest address
494 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
495 // nothing
496 }
497 md = p - efi_desc_size;
498 privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
499 if (xen_ia64_privcmd_check_size(privcmd_resource_min,
500 privcmd_resource_max)) {
501 goto out;
502 }
504 // the used highest address is too large. try to find the largest gap.
505 tmp_min = privcmd_resource_max;
506 tmp_max = 0;
507 gap_size = 0;
508 prev_end = 0;
509 for (p = efi_map_start;
510 p < efi_map_end - efi_desc_size;
511 p += efi_desc_size) {
512 unsigned long end;
513 efi_memory_desc_t* next;
514 unsigned long next_start;
516 md = p;
517 end = md_end_addr(md);
518 if (end > privcmd_resource_max) {
519 break;
520 }
521 if (end < prev_end) {
522 // work around.
523 // Xen may pass incompletely sorted memory
524 // descriptors like
525 // [x, x + length]
526 // [x, x]
527 // this order should be reversed.
528 continue;
529 }
530 next = p + efi_desc_size;
531 next_start = next->phys_addr;
532 if (next_start > privcmd_resource_max) {
533 next_start = privcmd_resource_max;
534 }
535 if (end < next_start && gap_size < (next_start - end)) {
536 tmp_min = end;
537 tmp_max = next_start;
538 gap_size = tmp_max - tmp_min;
539 }
540 prev_end = end;
541 }
543 privcmd_resource_min = GRANULEROUNDUP(tmp_min);
544 if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
545 privcmd_resource_max = tmp_max;
546 goto out;
547 }
549 privcmd_resource_min = tmp_min;
550 privcmd_resource_max = tmp_max;
551 if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
552 privcmd_resource_max)) {
553 // Any large enough gap isn't found.
554 // go ahead anyway with the warning hoping that large region
555 // won't be requested.
556 printk(KERN_WARNING "xen privcmd: large enough region for privcmd mmap is not found.\n");
557 }
559 out:
560 printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 0x%lx] (%ldMB)\n",
561 privcmd_resource_min, privcmd_resource_max,
562 (privcmd_resource_max - privcmd_resource_min) >> 20);
563 BUG_ON(privcmd_resource_min >= privcmd_resource_max);
565 // XXX this should be somewhere appropriate
566 (void)p2m_expose_init();
568 return 0;
569 }
570 late_initcall(xen_ia64_privcmd_init);
572 struct xen_ia64_privcmd_entry {
573 atomic_t map_count;
574 #define INVALID_GPFN (~0UL)
575 unsigned long gpfn;
576 };
578 struct xen_ia64_privcmd_range {
579 atomic_t ref_count;
580 unsigned long pgoff; // in PAGE_SIZE
581 struct resource* res;
583 unsigned long num_entries;
584 struct xen_ia64_privcmd_entry entries[0];
585 };
587 struct xen_ia64_privcmd_vma {
588 int is_privcmd_mmapped;
589 struct xen_ia64_privcmd_range* range;
591 unsigned long num_entries;
592 struct xen_ia64_privcmd_entry* entries;
593 };
595 static void
596 xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
597 {
598 atomic_set(&entry->map_count, 0);
599 entry->gpfn = INVALID_GPFN;
600 }
602 static int
603 xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
604 unsigned long addr,
605 struct xen_ia64_privcmd_range* privcmd_range,
606 int i,
607 unsigned long gmfn,
608 pgprot_t prot,
609 domid_t domid)
610 {
611 int error = 0;
612 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
613 unsigned long gpfn;
614 unsigned long flags;
616 if ((addr & ~PAGE_MASK) != 0 || gmfn == INVALID_MFN) {
617 error = -EINVAL;
618 goto out;
619 }
621 if (entry->gpfn != INVALID_GPFN) {
622 error = -EBUSY;
623 goto out;
624 }
625 gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
627 flags = ASSIGN_writable;
628 if (pgprot_val(prot) == PROT_READ) {
629 flags = ASSIGN_readonly;
630 }
631 error = HYPERVISOR_add_physmap_with_gmfn(gpfn, gmfn, flags, domid);
632 if (error != 0) {
633 goto out;
634 }
636 prot = vma->vm_page_prot;
637 error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
638 if (error != 0) {
639 error = HYPERVISOR_zap_physmap(gpfn, 0);
640 if (error) {
641 BUG();//XXX
642 }
643 } else {
644 atomic_inc(&entry->map_count);
645 entry->gpfn = gpfn;
646 }
648 out:
649 return error;
650 }
652 static void
653 xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range,
654 int i)
655 {
656 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
657 unsigned long gpfn = entry->gpfn;
658 //gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
659 // (vma->vm_pgoff - privcmd_range->pgoff);
660 int error;
662 error = HYPERVISOR_zap_physmap(gpfn, 0);
663 if (error) {
664 BUG();//XXX
665 }
666 entry->gpfn = INVALID_GPFN;
667 }
669 static void
670 xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range,
671 int i)
672 {
673 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
674 if (entry->gpfn != INVALID_GPFN) {
675 atomic_inc(&entry->map_count);
676 } else {
677 BUG_ON(atomic_read(&entry->map_count) != 0);
678 }
679 }
681 static void
682 xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range,
683 int i)
684 {
685 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
686 if (entry->gpfn != INVALID_GPFN &&
687 atomic_dec_and_test(&entry->map_count)) {
688 xen_ia64_privcmd_entry_munmap(privcmd_range, i);
689 }
690 }
692 static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
693 static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
695 struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
696 .open = &xen_ia64_privcmd_vma_open,
697 .close = &xen_ia64_privcmd_vma_close,
698 };
700 static void
701 __xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
702 struct xen_ia64_privcmd_vma* privcmd_vma,
703 struct xen_ia64_privcmd_range* privcmd_range)
704 {
705 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
706 unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
707 unsigned long i;
709 BUG_ON(entry_offset < 0);
710 BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
712 privcmd_vma->range = privcmd_range;
713 privcmd_vma->num_entries = num_entries;
714 privcmd_vma->entries = &privcmd_range->entries[entry_offset];
715 vma->vm_private_data = privcmd_vma;
716 for (i = 0; i < privcmd_vma->num_entries; i++) {
717 xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
718 }
720 vma->vm_private_data = privcmd_vma;
721 vma->vm_ops = &xen_ia64_privcmd_vm_ops;
722 }
724 static void
725 xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
726 {
727 struct xen_ia64_privcmd_vma* old_privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
728 struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
729 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
731 atomic_inc(&privcmd_range->ref_count);
732 // vm_op->open() can't fail.
733 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
734 // copy original value if necessary
735 privcmd_vma->is_privcmd_mmapped = old_privcmd_vma->is_privcmd_mmapped;
737 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
738 }
740 static void
741 xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
742 {
743 struct xen_ia64_privcmd_vma* privcmd_vma =
744 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
745 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
746 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
747 unsigned long i;
749 for (i = 0; i < privcmd_vma->num_entries; i++) {
750 xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
751 }
752 vma->vm_private_data = NULL;
753 kfree(privcmd_vma);
755 if (atomic_dec_and_test(&privcmd_range->ref_count)) {
756 #if 1
757 for (i = 0; i < privcmd_range->num_entries; i++) {
758 struct xen_ia64_privcmd_entry* entry =
759 &privcmd_range->entries[i];
760 BUG_ON(atomic_read(&entry->map_count) != 0);
761 BUG_ON(entry->gpfn != INVALID_GPFN);
762 }
763 #endif
764 release_resource(privcmd_range->res);
765 kfree(privcmd_range->res);
766 vfree(privcmd_range);
767 }
768 }
770 int
771 privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
772 {
773 struct xen_ia64_privcmd_vma* privcmd_vma =
774 (struct xen_ia64_privcmd_vma *)vma->vm_private_data;
775 return (xchg(&privcmd_vma->is_privcmd_mmapped, 1) == 0);
776 }
778 int
779 privcmd_mmap(struct file * file, struct vm_area_struct * vma)
780 {
781 int error;
782 unsigned long size = vma->vm_end - vma->vm_start;
783 unsigned long num_entries = size >> PAGE_SHIFT;
784 struct xen_ia64_privcmd_range* privcmd_range = NULL;
785 struct xen_ia64_privcmd_vma* privcmd_vma = NULL;
786 struct resource* res = NULL;
787 unsigned long i;
788 BUG_ON(!is_running_on_xen());
790 BUG_ON(file->private_data != NULL);
792 error = -ENOMEM;
793 privcmd_range =
794 vmalloc(sizeof(*privcmd_range) +
795 sizeof(privcmd_range->entries[0]) * num_entries);
796 if (privcmd_range == NULL) {
797 goto out_enomem0;
798 }
799 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
800 if (privcmd_vma == NULL) {
801 goto out_enomem1;
802 }
803 privcmd_vma->is_privcmd_mmapped = 0;
805 res = kzalloc(sizeof(*res), GFP_KERNEL);
806 if (res == NULL) {
807 goto out_enomem1;
808 }
809 res->name = "Xen privcmd mmap";
810 error = allocate_resource(&iomem_resource, res, size,
811 privcmd_resource_min, privcmd_resource_max,
812 privcmd_resource_align, NULL, NULL);
813 if (error) {
814 goto out_enomem1;
815 }
816 privcmd_range->res = res;
818 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
819 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
821 atomic_set(&privcmd_range->ref_count, 1);
822 privcmd_range->pgoff = vma->vm_pgoff;
823 privcmd_range->num_entries = num_entries;
824 for (i = 0; i < privcmd_range->num_entries; i++) {
825 xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
826 }
828 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
829 return 0;
831 out_enomem1:
832 kfree(res);
833 kfree(privcmd_vma);
834 out_enomem0:
835 vfree(privcmd_range);
836 return error;
837 }
839 int
840 direct_remap_pfn_range(struct vm_area_struct *vma,
841 unsigned long address, // process virtual address
842 unsigned long gmfn, // gmfn, gmfn + 1, ... gmfn + size/PAGE_SIZE
843 unsigned long size,
844 pgprot_t prot,
845 domid_t domid) // target domain
846 {
847 struct xen_ia64_privcmd_vma* privcmd_vma =
848 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
849 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
850 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
852 unsigned long i;
853 unsigned long offset;
854 int error = 0;
855 BUG_ON(!is_running_on_xen());
857 #if 0
858 if (prot != vm->vm_page_prot) {
859 return -EINVAL;
860 }
861 #endif
863 i = (address - vma->vm_start) >> PAGE_SHIFT;
864 for (offset = 0; offset < size; offset += PAGE_SIZE) {
865 error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, gmfn, prot, domid);
866 if (error != 0) {
867 break;
868 }
870 i++;
871 gmfn++;
872 }
874 return error;
875 }
878 ///////////////////////////////////////////////////////////////////////////
879 // expose p2m table
880 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
881 #include <linux/cpu.h>
882 #include <asm/uaccess.h>
884 int p2m_initialized __read_mostly = 0;
886 unsigned long p2m_min_low_pfn __read_mostly;
887 unsigned long p2m_max_low_pfn __read_mostly;
888 unsigned long p2m_convert_min_pfn __read_mostly;
889 unsigned long p2m_convert_max_pfn __read_mostly;
891 static struct resource p2m_resource = {
892 .name = "Xen p2m table",
893 .flags = IORESOURCE_MEM,
894 };
895 static unsigned long p2m_assign_start_pfn __read_mostly;
896 static unsigned long p2m_assign_end_pfn __read_mostly;
897 static unsigned long p2m_expose_size; // this is referenced only when resume.
898 // so __read_mostly doesn't make sense.
899 volatile const pte_t* p2m_pte __read_mostly;
901 #define GRNULE_PFN PTRS_PER_PTE
902 static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN;
904 #define ROUNDDOWN(x, y) ((x) & ~((y) - 1))
905 #define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1))
907 #define P2M_PREFIX "Xen p2m: "
909 static int xen_ia64_p2m_expose __read_mostly = 1;
910 module_param(xen_ia64_p2m_expose, int, 0);
911 MODULE_PARM_DESC(xen_ia64_p2m_expose,
912 "enable/disable xen/ia64 p2m exposure optimization\n");
914 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
915 static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
916 module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
917 MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
918 "use/unuse dtr to map exposed p2m table\n");
920 static const int p2m_page_shifts[] = {
921 _PAGE_SIZE_4K,
922 _PAGE_SIZE_8K,
923 _PAGE_SIZE_16K,
924 _PAGE_SIZE_64K,
925 _PAGE_SIZE_256K,
926 _PAGE_SIZE_1M,
927 _PAGE_SIZE_4M,
928 _PAGE_SIZE_16M,
929 _PAGE_SIZE_64M,
930 _PAGE_SIZE_256M,
931 };
933 struct p2m_itr_arg {
934 unsigned long vaddr;
935 unsigned long pteval;
936 unsigned long log_page_size;
937 };
938 static struct p2m_itr_arg p2m_itr_arg __read_mostly;
940 // This should be in asm-ia64/kregs.h
941 #define IA64_TR_P2M_TABLE 3
943 static void
944 p2m_itr(void* info)
945 {
946 struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info;
947 ia64_itr(0x2, IA64_TR_P2M_TABLE,
948 arg->vaddr, arg->pteval, arg->log_page_size);
949 ia64_srlz_d();
950 }
952 static int
953 p2m_expose_dtr_call(struct notifier_block *self,
954 unsigned long event, void* ptr)
955 {
956 unsigned int cpu = (unsigned int)(long)ptr;
957 if (event != CPU_ONLINE)
958 return 0;
959 if (p2m_initialized && xen_ia64_p2m_expose_use_dtr) {
960 unsigned int me = get_cpu();
961 if (cpu == me)
962 p2m_itr(&p2m_itr_arg);
963 else
964 smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg,
965 1, 1);
966 put_cpu();
967 }
968 return 0;
969 }
971 static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
972 .notifier_call = p2m_expose_dtr_call,
973 .next = NULL,
974 .priority = 0
975 };
976 #endif
978 static int
979 p2m_expose_init(void)
980 {
981 unsigned long num_pfn;
982 unsigned long p2m_size = 0;
983 unsigned long align = ~0UL;
984 int error = 0;
985 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
986 int i;
987 unsigned long page_size;
988 unsigned long log_page_size = 0;
989 #endif
991 if (!xen_ia64_p2m_expose)
992 return -ENOSYS;
993 if (p2m_initialized)
994 return 0;
996 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
997 error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
998 if (error < 0)
999 return error;
1000 #endif
1002 lock_cpu_hotplug();
1003 if (p2m_initialized)
1004 goto out;
1006 #ifdef CONFIG_DISCONTIGMEM
1007 p2m_min_low_pfn = min_low_pfn;
1008 p2m_max_low_pfn = max_low_pfn;
1009 #else
1010 p2m_min_low_pfn = 0;
1011 p2m_max_low_pfn = max_pfn;
1012 #endif
1014 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
1015 if (xen_ia64_p2m_expose_use_dtr) {
1016 unsigned long granule_pfn = 0;
1017 p2m_size = p2m_max_low_pfn - p2m_min_low_pfn;
1018 for (i = 0;
1019 i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
1020 i++) {
1021 log_page_size = p2m_page_shifts[i];
1022 page_size = 1UL << log_page_size;
1023 if (page_size < p2m_size)
1024 continue;
1026 granule_pfn = max(page_size >> PAGE_SHIFT,
1027 p2m_granule_pfn);
1028 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
1029 granule_pfn);
1030 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
1031 granule_pfn);
1032 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
1033 p2m_expose_size = num_pfn << PAGE_SHIFT;
1034 p2m_size = num_pfn / PTRS_PER_PTE;
1035 p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT);
1036 if (p2m_size == page_size)
1037 break;
1039 if (p2m_size != page_size) {
1040 printk(KERN_ERR "p2m_size != page_size\n");
1041 error = -EINVAL;
1042 goto out;
1044 align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
1045 } else
1046 #endif
1048 BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
1049 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
1050 p2m_granule_pfn);
1051 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn);
1052 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
1053 p2m_expose_size = num_pfn << PAGE_SHIFT;
1054 p2m_size = num_pfn / PTRS_PER_PTE;
1055 p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
1056 align = max(privcmd_resource_align,
1057 p2m_granule_pfn << PAGE_SHIFT);
1060 // use privcmd region
1061 error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
1062 privcmd_resource_min, privcmd_resource_max,
1063 align, NULL, NULL);
1064 if (error) {
1065 printk(KERN_ERR P2M_PREFIX
1066 "can't allocate region for p2m exposure "
1067 "[0x%016lx, 0x%016lx) 0x%016lx\n",
1068 p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
1069 goto out;
1072 p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
1073 p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
1075 error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
1076 p2m_assign_start_pfn,
1077 p2m_expose_size, p2m_granule_pfn);
1078 if (error) {
1079 printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
1080 error);
1081 printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
1082 "expose_size 0x%016lx granule 0x%016lx\n",
1083 p2m_convert_min_pfn, p2m_assign_start_pfn,
1084 p2m_expose_size, p2m_granule_pfn);;
1085 release_resource(&p2m_resource);
1086 goto out;
1088 p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
1089 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
1090 if (xen_ia64_p2m_expose_use_dtr) {
1091 p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
1092 << PAGE_SHIFT);
1093 p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
1094 PAGE_KERNEL));
1095 p2m_itr_arg.log_page_size = log_page_size;
1096 smp_mb();
1097 smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
1098 p2m_itr(&p2m_itr_arg);
1100 #endif
1101 smp_mb();
1102 p2m_initialized = 1;
1103 printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
1104 p2m_convert_min_pfn << PAGE_SHIFT,
1105 p2m_convert_max_pfn << PAGE_SHIFT);
1106 printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
1107 p2m_assign_start_pfn << PAGE_SHIFT,
1108 p2m_assign_end_pfn << PAGE_SHIFT,
1109 p2m_size / 1024);
1110 out:
1111 unlock_cpu_hotplug();
1112 return error;
1115 #ifdef notyet
1116 void
1117 p2m_expose_cleanup(void)
1119 BUG_ON(!p2m_initialized);
1120 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
1121 unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
1122 #endif
1123 release_resource(&p2m_resource);
1125 #endif
1127 static void
1128 p2m_expose_resume(void)
1130 int error;
1132 if (!xen_ia64_p2m_expose || !p2m_initialized)
1133 return;
1135 /*
1136 * We can't call {lock, unlock}_cpu_hotplug() because
1137 * they require process context.
1138 * We don't need them because we're the only one cpu and
1139 * interrupts are masked when resume.
1140 */
1141 error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
1142 p2m_assign_start_pfn,
1143 p2m_expose_size, p2m_granule_pfn);
1144 if (error) {
1145 printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
1146 error);
1147 printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
1148 "expose_size 0x%016lx granule 0x%016lx\n",
1149 p2m_convert_min_pfn, p2m_assign_start_pfn,
1150 p2m_expose_size, p2m_granule_pfn);;
1151 p2m_initialized = 0;
1152 smp_mb();
1153 ia64_ptr(0x2, p2m_itr_arg.vaddr, p2m_itr_arg.log_page_size);
1155 /*
1156 * We can't call those clean up functions because they
1157 * require process context.
1158 */
1159 #if 0
1160 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
1161 if (xen_ia64_p2m_expose_use_dtr)
1162 unregister_cpu_notifier(
1163 &p2m_expose_dtr_hotplug_notifier);
1164 #endif
1165 release_resource(&p2m_resource);
1166 #endif
1170 //XXX inlinize?
1171 unsigned long
1172 p2m_phystomach(unsigned long gpfn)
1174 volatile const pte_t* pte;
1175 unsigned long mfn;
1176 unsigned long pteval;
1178 if (!p2m_initialized ||
1179 gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
1180 /* || !pfn_valid(gpfn) */)
1181 return INVALID_MFN;
1182 pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
1184 mfn = INVALID_MFN;
1185 if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
1186 pte_present(__pte(pteval)) &&
1187 pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
1188 mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
1190 return mfn;
1193 EXPORT_SYMBOL_GPL(p2m_initialized);
1194 EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
1195 EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
1196 EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
1197 EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
1198 EXPORT_SYMBOL_GPL(p2m_pte);
1199 EXPORT_SYMBOL_GPL(p2m_phystomach);
1200 #endif
1202 ///////////////////////////////////////////////////////////////////////////
1203 // for xenoprof
1205 struct resource*
1206 xen_ia64_allocate_resource(unsigned long size)
1208 struct resource* res;
1209 int error;
1211 res = kmalloc(sizeof(*res), GFP_KERNEL);
1212 if (res == NULL)
1213 return ERR_PTR(-ENOMEM);
1215 res->name = "Xen";
1216 res->flags = IORESOURCE_MEM;
1217 error = allocate_resource(&iomem_resource, res, PAGE_ALIGN(size),
1218 privcmd_resource_min, privcmd_resource_max,
1219 IA64_GRANULE_SIZE, NULL, NULL);
1220 if (error) {
1221 kfree(res);
1222 return ERR_PTR(error);
1224 return res;
1226 EXPORT_SYMBOL_GPL(xen_ia64_allocate_resource);
1228 void
1229 xen_ia64_release_resource(struct resource* res)
1231 release_resource(res);
1232 kfree(res);
1234 EXPORT_SYMBOL_GPL(xen_ia64_release_resource);
1236 void
1237 xen_ia64_unmap_resource(struct resource* res)
1239 unsigned long gpfn = res->start >> PAGE_SHIFT;
1240 unsigned long nr_pages = (res->end - res->start) >> PAGE_SHIFT;
1241 unsigned long i;
1243 for (i = 0; i < nr_pages; i++) {
1244 int error = HYPERVISOR_zap_physmap(gpfn + i, 0);
1245 if (error)
1246 printk(KERN_ERR
1247 "%s:%d zap_phsymap failed %d gpfn %lx\n",
1248 __func__, __LINE__, error, gpfn + i);
1250 xen_ia64_release_resource(res);
1252 EXPORT_SYMBOL_GPL(xen_ia64_unmap_resource);
1254 ///////////////////////////////////////////////////////////////////////////
1255 // suspend/resume
1256 void
1257 xen_post_suspend(int suspend_cancelled)
1259 if (suspend_cancelled)
1260 return;
1262 p2m_expose_resume();
1263 /* add more if necessary */