ia64/xen-unstable

view linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c @ 12794:9787cb7262e8

[IA64] changed foreign domain page mapping semantic.

x86 foreign HVM domain page mapping semantic was changed to use gmfn
instead mfn. It applies to domains with auto_translated_mode enabled,
and all ia64 domains enable auto_translated_mode. This patch changes
ia64 foreign domain page mapping to use gmfn and fixes ia64 domU buidler.
However this patch breaks domain save/restore/dump-core.
They should also be fixed-up

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author awilliam@xenbuild.aw
date Tue Dec 05 10:59:32 2006 -0700 (2006-12-05)
parents fe565ac4bf25
children 1f811fe10d0a
line source
1 /******************************************************************************
2 * include/asm-ia64/shadow.h
3 *
4 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
5 * VA Linux Systems Japan K.K.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
23 //#include <linux/kernel.h>
24 #include <linux/spinlock.h>
25 #include <linux/bootmem.h>
26 #include <linux/module.h>
27 #include <linux/vmalloc.h>
28 #include <asm/page.h>
29 #include <asm/hypervisor.h>
30 #include <asm/hypercall.h>
31 #include <xen/interface/memory.h>
32 #include <xen/balloon.h>
34 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
35 EXPORT_SYMBOL(HYPERVISOR_shared_info);
37 start_info_t *xen_start_info;
38 EXPORT_SYMBOL(xen_start_info);
40 int running_on_xen;
41 EXPORT_SYMBOL(running_on_xen);
43 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
44 static int p2m_expose_init(void);
45 #else
46 #define p2m_expose_init() (-ENOSYS)
47 #endif
49 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
50 // move those to lib/contiguous_bitmap?
51 //XXX discontigmem/sparsemem
53 /*
54 * Bitmap is indexed by page number. If bit is set, the page is part of a
55 * xen_create_contiguous_region() area of memory.
56 */
57 unsigned long *contiguous_bitmap;
59 void
60 contiguous_bitmap_init(unsigned long end_pfn)
61 {
62 unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
63 contiguous_bitmap = alloc_bootmem_low_pages(size);
64 BUG_ON(!contiguous_bitmap);
65 memset(contiguous_bitmap, 0, size);
66 }
68 #if 0
69 int
70 contiguous_bitmap_test(void* p)
71 {
72 return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap);
73 }
74 #endif
76 static void contiguous_bitmap_set(
77 unsigned long first_page, unsigned long nr_pages)
78 {
79 unsigned long start_off, end_off, curr_idx, end_idx;
81 curr_idx = first_page / BITS_PER_LONG;
82 start_off = first_page & (BITS_PER_LONG-1);
83 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
84 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
86 if (curr_idx == end_idx) {
87 contiguous_bitmap[curr_idx] |=
88 ((1UL<<end_off)-1) & -(1UL<<start_off);
89 } else {
90 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
91 while ( ++curr_idx < end_idx )
92 contiguous_bitmap[curr_idx] = ~0UL;
93 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
94 }
95 }
97 static void contiguous_bitmap_clear(
98 unsigned long first_page, unsigned long nr_pages)
99 {
100 unsigned long start_off, end_off, curr_idx, end_idx;
102 curr_idx = first_page / BITS_PER_LONG;
103 start_off = first_page & (BITS_PER_LONG-1);
104 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
105 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
107 if (curr_idx == end_idx) {
108 contiguous_bitmap[curr_idx] &=
109 -(1UL<<end_off) | ((1UL<<start_off)-1);
110 } else {
111 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
112 while ( ++curr_idx != end_idx )
113 contiguous_bitmap[curr_idx] = 0;
114 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
115 }
116 }
118 // __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
119 // are based on i386 xen_create_contiguous_region(),
120 // xen_destroy_contiguous_region()
122 /* Protected by balloon_lock. */
123 #define MAX_CONTIG_ORDER 7
124 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
126 /* Ensure multi-page extents are contiguous in machine memory. */
127 int
128 __xen_create_contiguous_region(unsigned long vstart,
129 unsigned int order, unsigned int address_bits)
130 {
131 unsigned long error = 0;
132 unsigned long gphys = __pa(vstart);
133 unsigned long start_gpfn = gphys >> PAGE_SHIFT;
134 unsigned long num_gpfn = 1 << order;
135 unsigned long i;
136 unsigned long flags;
138 unsigned long *in_frames = discontig_frames, out_frame;
139 int success;
140 struct xen_memory_exchange exchange = {
141 .in = {
142 .nr_extents = num_gpfn,
143 .extent_order = 0,
144 .domid = DOMID_SELF
145 },
146 .out = {
147 .nr_extents = 1,
148 .extent_order = order,
149 .address_bits = address_bits,
150 .domid = DOMID_SELF
151 },
152 .nr_exchanged = 0
153 };
155 if (unlikely(order > MAX_CONTIG_ORDER))
156 return -ENOMEM;
158 set_xen_guest_handle(exchange.in.extent_start, in_frames);
159 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
161 scrub_pages(vstart, num_gpfn);
163 balloon_lock(flags);
165 /* Get a new contiguous memory extent. */
166 for (i = 0; i < num_gpfn; i++) {
167 in_frames[i] = start_gpfn + i;
168 }
169 out_frame = start_gpfn;
170 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
171 success = (exchange.nr_exchanged == num_gpfn);
172 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
173 BUG_ON(success && (error != 0));
174 if (unlikely(error == -ENOSYS)) {
175 /* Compatibility when XENMEM_exchange is unsupported. */
176 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
177 &exchange.in);
178 BUG_ON(error != num_gpfn);
179 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
180 &exchange.out);
181 if (error != 1) {
182 /* Couldn't get special memory: fall back to normal. */
183 for (i = 0; i < num_gpfn; i++) {
184 in_frames[i] = start_gpfn + i;
185 }
186 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
187 &exchange.in);
188 BUG_ON(error != num_gpfn);
189 success = 0;
190 } else
191 success = 1;
192 }
193 if (success)
194 contiguous_bitmap_set(start_gpfn, num_gpfn);
195 #if 0
196 if (success) {
197 unsigned long mfn;
198 unsigned long mfn_prev = ~0UL;
199 for (i = 0; i < num_gpfn; i++) {
200 mfn = pfn_to_mfn_for_dma(start_gpfn + i);
201 if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
202 xprintk("\n");
203 xprintk("%s:%d order %d "
204 "start 0x%lx bus 0x%lx "
205 "machine 0x%lx\n",
206 __func__, __LINE__, order,
207 vstart, virt_to_bus((void*)vstart),
208 phys_to_machine_for_dma(gphys));
209 xprintk("mfn: ");
210 for (i = 0; i < num_gpfn; i++) {
211 mfn = pfn_to_mfn_for_dma(
212 start_gpfn + i);
213 xprintk("0x%lx ", mfn);
214 }
215 xprintk("\n");
216 break;
217 }
218 mfn_prev = mfn;
219 }
220 }
221 #endif
222 balloon_unlock(flags);
223 return success? 0: -ENOMEM;
224 }
226 void
227 __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
228 {
229 unsigned long flags;
230 unsigned long error = 0;
231 unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
232 unsigned long num_gpfn = 1UL << order;
233 unsigned long i;
235 unsigned long *out_frames = discontig_frames, in_frame;
236 int success;
237 struct xen_memory_exchange exchange = {
238 .in = {
239 .nr_extents = 1,
240 .extent_order = order,
241 .domid = DOMID_SELF
242 },
243 .out = {
244 .nr_extents = num_gpfn,
245 .extent_order = 0,
246 .address_bits = 0,
247 .domid = DOMID_SELF
248 },
249 .nr_exchanged = 0
250 };
253 if (!test_bit(start_gpfn, contiguous_bitmap))
254 return;
256 if (unlikely(order > MAX_CONTIG_ORDER))
257 return;
259 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
260 set_xen_guest_handle(exchange.out.extent_start, out_frames);
262 scrub_pages(vstart, num_gpfn);
264 balloon_lock(flags);
266 contiguous_bitmap_clear(start_gpfn, num_gpfn);
268 /* Do the exchange for non-contiguous MFNs. */
269 in_frame = start_gpfn;
270 for (i = 0; i < num_gpfn; i++) {
271 out_frames[i] = start_gpfn + i;
272 }
273 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
274 success = (exchange.nr_exchanged == 1);
275 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
276 BUG_ON(success && (error != 0));
277 if (unlikely(error == -ENOSYS)) {
278 /* Compatibility when XENMEM_exchange is unsupported. */
279 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
280 &exchange.in);
281 BUG_ON(error != 1);
283 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
284 &exchange.out);
285 BUG_ON(error != num_gpfn);
286 }
287 balloon_unlock(flags);
288 }
291 ///////////////////////////////////////////////////////////////////////////
292 // grant table hack
293 // cmd: GNTTABOP_xxx
295 #include <linux/mm.h>
296 #include <xen/interface/xen.h>
297 #include <xen/gnttab.h>
299 static void
300 gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
301 {
302 uint32_t flags;
304 flags = uop->flags;
306 if (flags & GNTMAP_host_map) {
307 if (flags & GNTMAP_application_map) {
308 xprintd("GNTMAP_application_map is not supported yet: flags 0x%x\n", flags);
309 BUG();
310 }
311 if (flags & GNTMAP_contains_pte) {
312 xprintd("GNTMAP_contains_pte is not supported yet flags 0x%x\n", flags);
313 BUG();
314 }
315 } else if (flags & GNTMAP_device_map) {
316 xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
317 BUG();//XXX not yet. actually this flag is not used.
318 } else {
319 BUG();
320 }
321 }
323 int
324 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
325 {
326 if (cmd == GNTTABOP_map_grant_ref) {
327 unsigned int i;
328 for (i = 0; i < count; i++) {
329 gnttab_map_grant_ref_pre(
330 (struct gnttab_map_grant_ref*)uop + i);
331 }
332 }
333 return xencomm_mini_hypercall_grant_table_op(cmd, uop, count);
334 }
335 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
337 ///////////////////////////////////////////////////////////////////////////
338 // PageForeign(), SetPageForeign(), ClearPageForeign()
340 struct address_space xen_ia64_foreign_dummy_mapping;
341 EXPORT_SYMBOL(xen_ia64_foreign_dummy_mapping);
343 ///////////////////////////////////////////////////////////////////////////
344 // foreign mapping
345 #include <linux/efi.h>
346 #include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}()
348 static unsigned long privcmd_resource_min = 0;
349 // Xen/ia64 currently can handle pseudo physical address bits up to
350 // (PAGE_SHIFT * 3)
351 static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
352 static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
354 static unsigned long
355 md_end_addr(const efi_memory_desc_t *md)
356 {
357 return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
358 }
360 #define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE (1024 * 1024 * 1024UL)
361 static int
362 xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
363 {
364 return (start < end &&
365 (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
366 }
368 static int __init
369 xen_ia64_privcmd_init(void)
370 {
371 void *efi_map_start, *efi_map_end, *p;
372 u64 efi_desc_size;
373 efi_memory_desc_t *md;
374 unsigned long tmp_min;
375 unsigned long tmp_max;
376 unsigned long gap_size;
377 unsigned long prev_end;
379 if (!is_running_on_xen())
380 return -1;
382 efi_map_start = __va(ia64_boot_param->efi_memmap);
383 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
384 efi_desc_size = ia64_boot_param->efi_memdesc_size;
386 // at first check the used highest address
387 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
388 // nothing
389 }
390 md = p - efi_desc_size;
391 privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
392 if (xen_ia64_privcmd_check_size(privcmd_resource_min,
393 privcmd_resource_max)) {
394 goto out;
395 }
397 // the used highest address is too large. try to find the largest gap.
398 tmp_min = privcmd_resource_max;
399 tmp_max = 0;
400 gap_size = 0;
401 prev_end = 0;
402 for (p = efi_map_start;
403 p < efi_map_end - efi_desc_size;
404 p += efi_desc_size) {
405 unsigned long end;
406 efi_memory_desc_t* next;
407 unsigned long next_start;
409 md = p;
410 end = md_end_addr(md);
411 if (end > privcmd_resource_max) {
412 break;
413 }
414 if (end < prev_end) {
415 // work around.
416 // Xen may pass incompletely sorted memory
417 // descriptors like
418 // [x, x + length]
419 // [x, x]
420 // this order should be reversed.
421 continue;
422 }
423 next = p + efi_desc_size;
424 next_start = next->phys_addr;
425 if (next_start > privcmd_resource_max) {
426 next_start = privcmd_resource_max;
427 }
428 if (end < next_start && gap_size < (next_start - end)) {
429 tmp_min = end;
430 tmp_max = next_start;
431 gap_size = tmp_max - tmp_min;
432 }
433 prev_end = end;
434 }
436 privcmd_resource_min = GRANULEROUNDUP(tmp_min);
437 if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
438 privcmd_resource_max = tmp_max;
439 goto out;
440 }
442 privcmd_resource_min = tmp_min;
443 privcmd_resource_max = tmp_max;
444 if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
445 privcmd_resource_max)) {
446 // Any large enough gap isn't found.
447 // go ahead anyway with the warning hoping that large region
448 // won't be requested.
449 printk(KERN_WARNING "xen privcmd: large enough region for privcmd mmap is not found.\n");
450 }
452 out:
453 printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 0x%lx] (%ldMB)\n",
454 privcmd_resource_min, privcmd_resource_max,
455 (privcmd_resource_max - privcmd_resource_min) >> 20);
456 BUG_ON(privcmd_resource_min >= privcmd_resource_max);
458 // XXX this should be somewhere appropriate
459 (void)p2m_expose_init();
461 return 0;
462 }
463 late_initcall(xen_ia64_privcmd_init);
465 struct xen_ia64_privcmd_entry {
466 atomic_t map_count;
467 #define INVALID_GPFN (~0UL)
468 unsigned long gpfn;
469 };
471 struct xen_ia64_privcmd_range {
472 atomic_t ref_count;
473 unsigned long pgoff; // in PAGE_SIZE
474 struct resource* res;
476 unsigned long num_entries;
477 struct xen_ia64_privcmd_entry entries[0];
478 };
480 struct xen_ia64_privcmd_vma {
481 int is_privcmd_mmapped;
482 struct xen_ia64_privcmd_range* range;
484 unsigned long num_entries;
485 struct xen_ia64_privcmd_entry* entries;
486 };
488 static void
489 xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
490 {
491 atomic_set(&entry->map_count, 0);
492 entry->gpfn = INVALID_GPFN;
493 }
495 static int
496 xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
497 unsigned long addr,
498 struct xen_ia64_privcmd_range* privcmd_range,
499 int i,
500 unsigned long gmfn,
501 pgprot_t prot,
502 domid_t domid)
503 {
504 int error = 0;
505 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
506 unsigned long gpfn;
507 unsigned long flags;
509 if ((addr & ~PAGE_MASK) != 0 || gmfn == INVALID_MFN) {
510 error = -EINVAL;
511 goto out;
512 }
514 if (entry->gpfn != INVALID_GPFN) {
515 error = -EBUSY;
516 goto out;
517 }
518 gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
520 flags = ASSIGN_writable;
521 if (pgprot_val(prot) == PROT_READ) {
522 flags = ASSIGN_readonly;
523 }
524 error = HYPERVISOR_add_physmap_with_gmfn(gpfn, gmfn, flags, domid);
525 if (error != 0) {
526 goto out;
527 }
529 prot = vma->vm_page_prot;
530 error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
531 if (error != 0) {
532 error = HYPERVISOR_zap_physmap(gpfn, 0);
533 if (error) {
534 BUG();//XXX
535 }
536 } else {
537 atomic_inc(&entry->map_count);
538 entry->gpfn = gpfn;
539 }
541 out:
542 return error;
543 }
545 static void
546 xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range,
547 int i)
548 {
549 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
550 unsigned long gpfn = entry->gpfn;
551 //gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
552 // (vma->vm_pgoff - privcmd_range->pgoff);
553 int error;
555 error = HYPERVISOR_zap_physmap(gpfn, 0);
556 if (error) {
557 BUG();//XXX
558 }
559 entry->gpfn = INVALID_GPFN;
560 }
562 static void
563 xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range,
564 int i)
565 {
566 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
567 if (entry->gpfn != INVALID_GPFN) {
568 atomic_inc(&entry->map_count);
569 } else {
570 BUG_ON(atomic_read(&entry->map_count) != 0);
571 }
572 }
574 static void
575 xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range,
576 int i)
577 {
578 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
579 if (entry->gpfn != INVALID_GPFN &&
580 atomic_dec_and_test(&entry->map_count)) {
581 xen_ia64_privcmd_entry_munmap(privcmd_range, i);
582 }
583 }
585 static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
586 static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
588 struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
589 .open = &xen_ia64_privcmd_vma_open,
590 .close = &xen_ia64_privcmd_vma_close,
591 };
593 static void
594 __xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
595 struct xen_ia64_privcmd_vma* privcmd_vma,
596 struct xen_ia64_privcmd_range* privcmd_range)
597 {
598 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
599 unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
600 unsigned long i;
602 BUG_ON(entry_offset < 0);
603 BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
605 privcmd_vma->range = privcmd_range;
606 privcmd_vma->num_entries = num_entries;
607 privcmd_vma->entries = &privcmd_range->entries[entry_offset];
608 vma->vm_private_data = privcmd_vma;
609 for (i = 0; i < privcmd_vma->num_entries; i++) {
610 xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
611 }
613 vma->vm_private_data = privcmd_vma;
614 vma->vm_ops = &xen_ia64_privcmd_vm_ops;
615 }
617 static void
618 xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
619 {
620 struct xen_ia64_privcmd_vma* old_privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
621 struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
622 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
624 atomic_inc(&privcmd_range->ref_count);
625 // vm_op->open() can't fail.
626 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
627 // copy original value if necessary
628 privcmd_vma->is_privcmd_mmapped = old_privcmd_vma->is_privcmd_mmapped;
630 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
631 }
633 static void
634 xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
635 {
636 struct xen_ia64_privcmd_vma* privcmd_vma =
637 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
638 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
639 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
640 unsigned long i;
642 for (i = 0; i < privcmd_vma->num_entries; i++) {
643 xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
644 }
645 vma->vm_private_data = NULL;
646 kfree(privcmd_vma);
648 if (atomic_dec_and_test(&privcmd_range->ref_count)) {
649 #if 1
650 for (i = 0; i < privcmd_range->num_entries; i++) {
651 struct xen_ia64_privcmd_entry* entry =
652 &privcmd_range->entries[i];
653 BUG_ON(atomic_read(&entry->map_count) != 0);
654 BUG_ON(entry->gpfn != INVALID_GPFN);
655 }
656 #endif
657 release_resource(privcmd_range->res);
658 kfree(privcmd_range->res);
659 vfree(privcmd_range);
660 }
661 }
663 int
664 privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
665 {
666 struct xen_ia64_privcmd_vma* privcmd_vma =
667 (struct xen_ia64_privcmd_vma *)vma->vm_private_data;
668 return (xchg(&privcmd_vma->is_privcmd_mmapped, 1) == 0);
669 }
671 int
672 privcmd_mmap(struct file * file, struct vm_area_struct * vma)
673 {
674 int error;
675 unsigned long size = vma->vm_end - vma->vm_start;
676 unsigned long num_entries = size >> PAGE_SHIFT;
677 struct xen_ia64_privcmd_range* privcmd_range = NULL;
678 struct xen_ia64_privcmd_vma* privcmd_vma = NULL;
679 struct resource* res = NULL;
680 unsigned long i;
681 BUG_ON(!is_running_on_xen());
683 BUG_ON(file->private_data != NULL);
685 error = -ENOMEM;
686 privcmd_range =
687 vmalloc(sizeof(*privcmd_range) +
688 sizeof(privcmd_range->entries[0]) * num_entries);
689 if (privcmd_range == NULL) {
690 goto out_enomem0;
691 }
692 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
693 if (privcmd_vma == NULL) {
694 goto out_enomem1;
695 }
696 privcmd_vma->is_privcmd_mmapped = 0;
698 res = kzalloc(sizeof(*res), GFP_KERNEL);
699 if (res == NULL) {
700 goto out_enomem1;
701 }
702 res->name = "Xen privcmd mmap";
703 error = allocate_resource(&iomem_resource, res, size,
704 privcmd_resource_min, privcmd_resource_max,
705 privcmd_resource_align, NULL, NULL);
706 if (error) {
707 goto out_enomem1;
708 }
709 privcmd_range->res = res;
711 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
712 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
714 atomic_set(&privcmd_range->ref_count, 1);
715 privcmd_range->pgoff = vma->vm_pgoff;
716 privcmd_range->num_entries = num_entries;
717 for (i = 0; i < privcmd_range->num_entries; i++) {
718 xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
719 }
721 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
722 return 0;
724 out_enomem1:
725 kfree(res);
726 kfree(privcmd_vma);
727 out_enomem0:
728 vfree(privcmd_range);
729 return error;
730 }
732 int
733 direct_remap_pfn_range(struct vm_area_struct *vma,
734 unsigned long address, // process virtual address
735 unsigned long gmfn, // gmfn, gmfn + 1, ... gmfn + size/PAGE_SIZE
736 unsigned long size,
737 pgprot_t prot,
738 domid_t domid) // target domain
739 {
740 struct xen_ia64_privcmd_vma* privcmd_vma =
741 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
742 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
743 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
745 unsigned long i;
746 unsigned long offset;
747 int error = 0;
748 BUG_ON(!is_running_on_xen());
750 #if 0
751 if (prot != vm->vm_page_prot) {
752 return -EINVAL;
753 }
754 #endif
756 i = (address - vma->vm_start) >> PAGE_SHIFT;
757 for (offset = 0; offset < size; offset += PAGE_SIZE) {
758 error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, gmfn, prot, domid);
759 if (error != 0) {
760 break;
761 }
763 i++;
764 gmfn++;
765 }
767 return error;
768 }
771 /* Called after suspend, to resume time. */
772 void
773 time_resume(void)
774 {
775 extern void ia64_cpu_local_tick(void);
777 /* Just trigger a tick. */
778 ia64_cpu_local_tick();
779 }
781 ///////////////////////////////////////////////////////////////////////////
782 // expose p2m table
783 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
784 #include <linux/cpu.h>
785 #include <asm/uaccess.h>
787 int p2m_initialized __read_mostly = 0;
789 unsigned long p2m_min_low_pfn __read_mostly;
790 unsigned long p2m_max_low_pfn __read_mostly;
791 unsigned long p2m_convert_min_pfn __read_mostly;
792 unsigned long p2m_convert_max_pfn __read_mostly;
794 static struct resource p2m_resource = {
795 .name = "Xen p2m table",
796 .flags = IORESOURCE_MEM,
797 };
798 static unsigned long p2m_assign_start_pfn __read_mostly;
799 static unsigned long p2m_assign_end_pfn __read_mostly;
800 volatile const pte_t* p2m_pte __read_mostly;
802 #define GRNULE_PFN PTRS_PER_PTE
803 static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN;
805 #define ROUNDDOWN(x, y) ((x) & ~((y) - 1))
806 #define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1))
808 #define P2M_PREFIX "Xen p2m: "
810 static int xen_ia64_p2m_expose __read_mostly = 1;
811 module_param(xen_ia64_p2m_expose, int, 0);
812 MODULE_PARM_DESC(xen_ia64_p2m_expose,
813 "enable/disable xen/ia64 p2m exposure optimization\n");
815 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
816 static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
817 module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
818 MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
819 "use/unuse dtr to map exposed p2m table\n");
821 static const int p2m_page_shifts[] = {
822 _PAGE_SIZE_4K,
823 _PAGE_SIZE_8K,
824 _PAGE_SIZE_16K,
825 _PAGE_SIZE_64K,
826 _PAGE_SIZE_256K,
827 _PAGE_SIZE_1M,
828 _PAGE_SIZE_4M,
829 _PAGE_SIZE_16M,
830 _PAGE_SIZE_64M,
831 _PAGE_SIZE_256M,
832 };
834 struct p2m_itr_arg {
835 unsigned long vaddr;
836 unsigned long pteval;
837 unsigned long log_page_size;
838 };
839 static struct p2m_itr_arg p2m_itr_arg __read_mostly;
841 // This should be in asm-ia64/kregs.h
842 #define IA64_TR_P2M_TABLE 3
844 static void
845 p2m_itr(void* info)
846 {
847 struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info;
848 ia64_itr(0x2, IA64_TR_P2M_TABLE,
849 arg->vaddr, arg->pteval, arg->log_page_size);
850 ia64_srlz_d();
851 }
853 static int
854 p2m_expose_dtr_call(struct notifier_block *self,
855 unsigned long event, void* ptr)
856 {
857 unsigned int cpu = (unsigned int)(long)ptr;
858 if (event != CPU_ONLINE)
859 return 0;
860 if (!(p2m_initialized && xen_ia64_p2m_expose_use_dtr))
861 smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg, 1, 1);
862 return 0;
863 }
865 static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
866 .notifier_call = p2m_expose_dtr_call,
867 .next = NULL,
868 .priority = 0
869 };
870 #endif
872 static int
873 p2m_expose_init(void)
874 {
875 unsigned long num_pfn;
876 unsigned long size = 0;
877 unsigned long p2m_size = 0;
878 unsigned long align = ~0UL;
879 int error = 0;
880 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
881 int i;
882 unsigned long page_size;
883 unsigned long log_page_size = 0;
884 #endif
886 if (!xen_ia64_p2m_expose)
887 return -ENOSYS;
888 if (p2m_initialized)
889 return 0;
891 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
892 error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
893 if (error < 0)
894 return error;
895 #endif
897 lock_cpu_hotplug();
898 if (p2m_initialized)
899 goto out;
901 #ifdef CONFIG_DISCONTIGMEM
902 p2m_min_low_pfn = min_low_pfn;
903 p2m_max_low_pfn = max_low_pfn;
904 #else
905 p2m_min_low_pfn = 0;
906 p2m_max_low_pfn = max_pfn;
907 #endif
909 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
910 if (xen_ia64_p2m_expose_use_dtr) {
911 unsigned long granule_pfn = 0;
912 p2m_size = p2m_max_low_pfn - p2m_min_low_pfn;
913 for (i = 0;
914 i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
915 i++) {
916 log_page_size = p2m_page_shifts[i];
917 page_size = 1UL << log_page_size;
918 if (page_size < p2m_size)
919 continue;
921 granule_pfn = max(page_size >> PAGE_SHIFT,
922 p2m_granule_pfn);
923 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
924 granule_pfn);
925 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
926 granule_pfn);
927 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
928 size = num_pfn << PAGE_SHIFT;
929 p2m_size = num_pfn / PTRS_PER_PTE;
930 p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT);
931 if (p2m_size == page_size)
932 break;
933 }
934 if (p2m_size != page_size) {
935 printk(KERN_ERR "p2m_size != page_size\n");
936 error = -EINVAL;
937 goto out;
938 }
939 align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
940 } else
941 #endif
942 {
943 BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
944 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
945 p2m_granule_pfn);
946 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn);
947 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
948 size = num_pfn << PAGE_SHIFT;
949 p2m_size = num_pfn / PTRS_PER_PTE;
950 p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
951 align = max(privcmd_resource_align,
952 p2m_granule_pfn << PAGE_SHIFT);
953 }
955 // use privcmd region
956 error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
957 privcmd_resource_min, privcmd_resource_max,
958 align, NULL, NULL);
959 if (error) {
960 printk(KERN_ERR P2M_PREFIX
961 "can't allocate region for p2m exposure "
962 "[0x%016lx, 0x%016lx) 0x%016lx\n",
963 p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
964 goto out;
965 }
967 p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
968 p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
970 error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
971 p2m_assign_start_pfn,
972 size, p2m_granule_pfn);
973 if (error) {
974 printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
975 error);
976 printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
977 "size 0x%016lx granule 0x%016lx\n",
978 p2m_convert_min_pfn, p2m_assign_start_pfn,
979 size, p2m_granule_pfn);;
980 release_resource(&p2m_resource);
981 goto out;
982 }
983 p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
984 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
985 if (xen_ia64_p2m_expose_use_dtr) {
986 p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
987 << PAGE_SHIFT);
988 p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
989 PAGE_KERNEL));
990 p2m_itr_arg.log_page_size = log_page_size;
991 smp_mb();
992 smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
993 p2m_itr(&p2m_itr_arg);
994 }
995 #endif
996 smp_mb();
997 p2m_initialized = 1;
998 printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
999 p2m_convert_min_pfn << PAGE_SHIFT,
1000 p2m_convert_max_pfn << PAGE_SHIFT);
1001 printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
1002 p2m_assign_start_pfn << PAGE_SHIFT,
1003 p2m_assign_end_pfn << PAGE_SHIFT,
1004 p2m_size / 1024);
1005 out:
1006 unlock_cpu_hotplug();
1007 return error;
1010 #ifdef notyet
1011 void
1012 p2m_expose_cleanup(void)
1014 BUG_ON(!p2m_initialized);
1015 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
1016 unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
1017 #endif
1018 release_resource(&p2m_resource);
1020 #endif
1022 //XXX inlinize?
1023 unsigned long
1024 p2m_phystomach(unsigned long gpfn)
1026 volatile const pte_t* pte;
1027 unsigned long mfn;
1028 unsigned long pteval;
1030 if (!p2m_initialized ||
1031 gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
1032 /* || !pfn_valid(gpfn) */)
1033 return INVALID_MFN;
1034 pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
1036 mfn = INVALID_MFN;
1037 if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
1038 pte_present(__pte(pteval)) &&
1039 pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
1040 mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
1042 return mfn;
1045 EXPORT_SYMBOL_GPL(p2m_initialized);
1046 EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
1047 EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
1048 EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
1049 EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
1050 EXPORT_SYMBOL_GPL(p2m_pte);
1051 EXPORT_SYMBOL_GPL(p2m_phystomach);
1052 #endif
1054 ///////////////////////////////////////////////////////////////////////////
1055 // for xenoprof
1057 struct resource*
1058 xen_ia64_allocate_resource(unsigned long size)
1060 struct resource* res;
1061 int error;
1063 res = kmalloc(sizeof(*res), GFP_KERNEL);
1064 if (res == NULL)
1065 return ERR_PTR(-ENOMEM);
1067 res->name = "Xen";
1068 res->flags = IORESOURCE_MEM;
1069 error = allocate_resource(&iomem_resource, res, PAGE_ALIGN(size),
1070 privcmd_resource_min, privcmd_resource_max,
1071 IA64_GRANULE_SIZE, NULL, NULL);
1072 if (error) {
1073 kfree(res);
1074 return ERR_PTR(error);
1076 return res;
1078 EXPORT_SYMBOL_GPL(xen_ia64_allocate_resource);
1080 void
1081 xen_ia64_release_resource(struct resource* res)
1083 release_resource(res);
1084 kfree(res);
1086 EXPORT_SYMBOL_GPL(xen_ia64_release_resource);
1088 void
1089 xen_ia64_unmap_resource(struct resource* res)
1091 unsigned long gpfn = res->start >> PAGE_SHIFT;
1092 unsigned long nr_pages = (res->end - res->start) >> PAGE_SHIFT;
1093 unsigned long i;
1095 for (i = 0; i < nr_pages; i++) {
1096 int error = HYPERVISOR_zap_physmap(gpfn + i, 0);
1097 if (error)
1098 printk(KERN_ERR
1099 "%s:%d zap_phsymap failed %d gpfn %lx\n",
1100 __func__, __LINE__, error, gpfn + i);
1102 xen_ia64_release_resource(res);
1104 EXPORT_SYMBOL_GPL(xen_ia64_unmap_resource);