ia64/xen-unstable

view linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c @ 11727:d1d9f3f6ca09

[IA64] p2m exposure. linux side part.

This patch introduce compile time option XEN_IA64_EXPOSE_P2M
to enable this feature and boot option xen_ia64_p2m_expose to
disable the functionality.
This patch also introduce XEN_IA64_EXPOSE_P2M_USE_DTR to map p2m table
with dtr and boot option xen_ia64_p2m_expose_use_dtr to disable it.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author awilliam@xenbuild.aw
date Wed Oct 04 22:12:29 2006 -0600 (2006-10-04)
parents 5c97ef4c7147
children 3e9fcbee3c09
line source
1 /******************************************************************************
2 * include/asm-ia64/shadow.h
3 *
4 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
5 * VA Linux Systems Japan K.K.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
23 //#include <linux/kernel.h>
24 #include <linux/spinlock.h>
25 #include <linux/bootmem.h>
26 #include <linux/module.h>
27 #include <linux/vmalloc.h>
28 #include <asm/page.h>
29 #include <asm/hypervisor.h>
30 #include <asm/hypercall.h>
31 #include <xen/interface/memory.h>
32 #include <xen/balloon.h>
34 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
35 EXPORT_SYMBOL(HYPERVISOR_shared_info);
37 start_info_t *xen_start_info;
38 EXPORT_SYMBOL(xen_start_info);
40 int running_on_xen;
41 EXPORT_SYMBOL(running_on_xen);
43 static int p2m_expose_init(void);
45 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
46 // move those to lib/contiguous_bitmap?
47 //XXX discontigmem/sparsemem
49 /*
50 * Bitmap is indexed by page number. If bit is set, the page is part of a
51 * xen_create_contiguous_region() area of memory.
52 */
53 unsigned long *contiguous_bitmap;
55 void
56 contiguous_bitmap_init(unsigned long end_pfn)
57 {
58 unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
59 contiguous_bitmap = alloc_bootmem_low_pages(size);
60 BUG_ON(!contiguous_bitmap);
61 memset(contiguous_bitmap, 0, size);
62 }
64 #if 0
65 int
66 contiguous_bitmap_test(void* p)
67 {
68 return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap);
69 }
70 #endif
72 static void contiguous_bitmap_set(
73 unsigned long first_page, unsigned long nr_pages)
74 {
75 unsigned long start_off, end_off, curr_idx, end_idx;
77 curr_idx = first_page / BITS_PER_LONG;
78 start_off = first_page & (BITS_PER_LONG-1);
79 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
80 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
82 if (curr_idx == end_idx) {
83 contiguous_bitmap[curr_idx] |=
84 ((1UL<<end_off)-1) & -(1UL<<start_off);
85 } else {
86 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
87 while ( ++curr_idx < end_idx )
88 contiguous_bitmap[curr_idx] = ~0UL;
89 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
90 }
91 }
93 static void contiguous_bitmap_clear(
94 unsigned long first_page, unsigned long nr_pages)
95 {
96 unsigned long start_off, end_off, curr_idx, end_idx;
98 curr_idx = first_page / BITS_PER_LONG;
99 start_off = first_page & (BITS_PER_LONG-1);
100 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
101 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
103 if (curr_idx == end_idx) {
104 contiguous_bitmap[curr_idx] &=
105 -(1UL<<end_off) | ((1UL<<start_off)-1);
106 } else {
107 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
108 while ( ++curr_idx != end_idx )
109 contiguous_bitmap[curr_idx] = 0;
110 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
111 }
112 }
114 // __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
115 // are based on i386 xen_create_contiguous_region(),
116 // xen_destroy_contiguous_region()
118 /* Protected by balloon_lock. */
119 #define MAX_CONTIG_ORDER 7
120 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
122 /* Ensure multi-page extents are contiguous in machine memory. */
123 int
124 __xen_create_contiguous_region(unsigned long vstart,
125 unsigned int order, unsigned int address_bits)
126 {
127 unsigned long error = 0;
128 unsigned long gphys = __pa(vstart);
129 unsigned long start_gpfn = gphys >> PAGE_SHIFT;
130 unsigned long num_gpfn = 1 << order;
131 unsigned long i;
132 unsigned long flags;
134 unsigned long *in_frames = discontig_frames, out_frame;
135 int success;
136 struct xen_memory_exchange exchange = {
137 .in = {
138 .nr_extents = num_gpfn,
139 .extent_order = 0,
140 .domid = DOMID_SELF
141 },
142 .out = {
143 .nr_extents = 1,
144 .extent_order = order,
145 .address_bits = address_bits,
146 .domid = DOMID_SELF
147 },
148 .nr_exchanged = 0
149 };
151 if (unlikely(order > MAX_CONTIG_ORDER))
152 return -ENOMEM;
154 set_xen_guest_handle(exchange.in.extent_start, in_frames);
155 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
157 scrub_pages(vstart, num_gpfn);
159 balloon_lock(flags);
161 /* Get a new contiguous memory extent. */
162 for (i = 0; i < num_gpfn; i++) {
163 in_frames[i] = start_gpfn + i;
164 }
165 out_frame = start_gpfn;
166 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
167 success = (exchange.nr_exchanged == num_gpfn);
168 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
169 BUG_ON(success && (error != 0));
170 if (unlikely(error == -ENOSYS)) {
171 /* Compatibility when XENMEM_exchange is unsupported. */
172 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
173 &exchange.in);
174 BUG_ON(error != num_gpfn);
175 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
176 &exchange.out);
177 if (error != 1) {
178 /* Couldn't get special memory: fall back to normal. */
179 for (i = 0; i < num_gpfn; i++) {
180 in_frames[i] = start_gpfn + i;
181 }
182 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
183 &exchange.in);
184 BUG_ON(error != num_gpfn);
185 success = 0;
186 } else
187 success = 1;
188 }
189 if (success)
190 contiguous_bitmap_set(start_gpfn, num_gpfn);
191 #if 0
192 if (success) {
193 unsigned long mfn;
194 unsigned long mfn_prev = ~0UL;
195 for (i = 0; i < num_gpfn; i++) {
196 mfn = pfn_to_mfn_for_dma(start_gpfn + i);
197 if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
198 xprintk("\n");
199 xprintk("%s:%d order %d "
200 "start 0x%lx bus 0x%lx "
201 "machine 0x%lx\n",
202 __func__, __LINE__, order,
203 vstart, virt_to_bus((void*)vstart),
204 phys_to_machine_for_dma(gphys));
205 xprintk("mfn: ");
206 for (i = 0; i < num_gpfn; i++) {
207 mfn = pfn_to_mfn_for_dma(
208 start_gpfn + i);
209 xprintk("0x%lx ", mfn);
210 }
211 xprintk("\n");
212 break;
213 }
214 mfn_prev = mfn;
215 }
216 }
217 #endif
218 balloon_unlock(flags);
219 return success? 0: -ENOMEM;
220 }
222 void
223 __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
224 {
225 unsigned long flags;
226 unsigned long error = 0;
227 unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
228 unsigned long num_gpfn = 1UL << order;
229 unsigned long i;
231 unsigned long *out_frames = discontig_frames, in_frame;
232 int success;
233 struct xen_memory_exchange exchange = {
234 .in = {
235 .nr_extents = 1,
236 .extent_order = order,
237 .domid = DOMID_SELF
238 },
239 .out = {
240 .nr_extents = num_gpfn,
241 .extent_order = 0,
242 .address_bits = 0,
243 .domid = DOMID_SELF
244 },
245 .nr_exchanged = 0
246 };
249 if (!test_bit(start_gpfn, contiguous_bitmap))
250 return;
252 if (unlikely(order > MAX_CONTIG_ORDER))
253 return;
255 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
256 set_xen_guest_handle(exchange.out.extent_start, out_frames);
258 scrub_pages(vstart, num_gpfn);
260 balloon_lock(flags);
262 contiguous_bitmap_clear(start_gpfn, num_gpfn);
264 /* Do the exchange for non-contiguous MFNs. */
265 in_frame = start_gpfn;
266 for (i = 0; i < num_gpfn; i++) {
267 out_frames[i] = start_gpfn + i;
268 }
269 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
270 success = (exchange.nr_exchanged == 1);
271 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
272 BUG_ON(success && (error != 0));
273 if (unlikely(error == -ENOSYS)) {
274 /* Compatibility when XENMEM_exchange is unsupported. */
275 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
276 &exchange.in);
277 BUG_ON(error != 1);
279 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
280 &exchange.out);
281 BUG_ON(error != num_gpfn);
282 }
283 balloon_unlock(flags);
284 }
287 ///////////////////////////////////////////////////////////////////////////
288 // grant table hack
289 // cmd: GNTTABOP_xxx
291 #include <linux/mm.h>
292 #include <xen/interface/xen.h>
293 #include <xen/gnttab.h>
295 static void
296 gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
297 {
298 uint32_t flags;
300 flags = uop->flags;
302 if (flags & GNTMAP_host_map) {
303 if (flags & GNTMAP_application_map) {
304 xprintd("GNTMAP_application_map is not supported yet: flags 0x%x\n", flags);
305 BUG();
306 }
307 if (flags & GNTMAP_contains_pte) {
308 xprintd("GNTMAP_contains_pte is not supported yet flags 0x%x\n", flags);
309 BUG();
310 }
311 } else if (flags & GNTMAP_device_map) {
312 xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
313 BUG();//XXX not yet. actually this flag is not used.
314 } else {
315 BUG();
316 }
317 }
319 int
320 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
321 {
322 if (cmd == GNTTABOP_map_grant_ref) {
323 unsigned int i;
324 for (i = 0; i < count; i++) {
325 gnttab_map_grant_ref_pre(
326 (struct gnttab_map_grant_ref*)uop + i);
327 }
328 }
329 return xencomm_mini_hypercall_grant_table_op(cmd, uop, count);
330 }
331 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
333 ///////////////////////////////////////////////////////////////////////////
334 // PageForeign(), SetPageForeign(), ClearPageForeign()
336 struct address_space xen_ia64_foreign_dummy_mapping;
337 EXPORT_SYMBOL(xen_ia64_foreign_dummy_mapping);
339 ///////////////////////////////////////////////////////////////////////////
340 // foreign mapping
341 #include <linux/efi.h>
342 #include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}()
344 static unsigned long privcmd_resource_min = 0;
345 // Xen/ia64 currently can handle pseudo physical address bits up to
346 // (PAGE_SHIFT * 3)
347 static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
348 static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
350 static unsigned long
351 md_end_addr(const efi_memory_desc_t *md)
352 {
353 return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
354 }
356 #define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE (1024 * 1024 * 1024UL)
357 static int
358 xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
359 {
360 return (start < end &&
361 (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
362 }
364 static int __init
365 xen_ia64_privcmd_init(void)
366 {
367 void *efi_map_start, *efi_map_end, *p;
368 u64 efi_desc_size;
369 efi_memory_desc_t *md;
370 unsigned long tmp_min;
371 unsigned long tmp_max;
372 unsigned long gap_size;
373 unsigned long prev_end;
375 if (!is_running_on_xen())
376 return -1;
378 efi_map_start = __va(ia64_boot_param->efi_memmap);
379 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
380 efi_desc_size = ia64_boot_param->efi_memdesc_size;
382 // at first check the used highest address
383 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
384 // nothing
385 }
386 md = p - efi_desc_size;
387 privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
388 if (xen_ia64_privcmd_check_size(privcmd_resource_min,
389 privcmd_resource_max)) {
390 goto out;
391 }
393 // the used highest address is too large. try to find the largest gap.
394 tmp_min = privcmd_resource_max;
395 tmp_max = 0;
396 gap_size = 0;
397 prev_end = 0;
398 for (p = efi_map_start;
399 p < efi_map_end - efi_desc_size;
400 p += efi_desc_size) {
401 unsigned long end;
402 efi_memory_desc_t* next;
403 unsigned long next_start;
405 md = p;
406 end = md_end_addr(md);
407 if (end > privcmd_resource_max) {
408 break;
409 }
410 if (end < prev_end) {
411 // work around.
412 // Xen may pass incompletely sorted memory
413 // descriptors like
414 // [x, x + length]
415 // [x, x]
416 // this order should be reversed.
417 continue;
418 }
419 next = p + efi_desc_size;
420 next_start = next->phys_addr;
421 if (next_start > privcmd_resource_max) {
422 next_start = privcmd_resource_max;
423 }
424 if (end < next_start && gap_size < (next_start - end)) {
425 tmp_min = end;
426 tmp_max = next_start;
427 gap_size = tmp_max - tmp_min;
428 }
429 prev_end = end;
430 }
432 privcmd_resource_min = GRANULEROUNDUP(tmp_min);
433 if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
434 privcmd_resource_max = tmp_max;
435 goto out;
436 }
438 privcmd_resource_min = tmp_min;
439 privcmd_resource_max = tmp_max;
440 if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
441 privcmd_resource_max)) {
442 // Any large enough gap isn't found.
443 // go ahead anyway with the warning hoping that large region
444 // won't be requested.
445 printk(KERN_WARNING "xen privcmd: large enough region for privcmd mmap is not found.\n");
446 }
448 out:
449 printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 0x%lx] (%ldMB)\n",
450 privcmd_resource_min, privcmd_resource_max,
451 (privcmd_resource_max - privcmd_resource_min) >> 20);
452 BUG_ON(privcmd_resource_min >= privcmd_resource_max);
454 // XXX this should be somewhere appropriate
455 (void)p2m_expose_init();
457 return 0;
458 }
459 late_initcall(xen_ia64_privcmd_init);
461 struct xen_ia64_privcmd_entry {
462 atomic_t map_count;
463 #define INVALID_GPFN (~0UL)
464 unsigned long gpfn;
465 };
467 struct xen_ia64_privcmd_range {
468 atomic_t ref_count;
469 unsigned long pgoff; // in PAGE_SIZE
470 struct resource* res;
472 unsigned long num_entries;
473 struct xen_ia64_privcmd_entry entries[0];
474 };
476 struct xen_ia64_privcmd_vma {
477 struct xen_ia64_privcmd_range* range;
479 unsigned long num_entries;
480 struct xen_ia64_privcmd_entry* entries;
481 };
483 static void
484 xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
485 {
486 atomic_set(&entry->map_count, 0);
487 entry->gpfn = INVALID_GPFN;
488 }
490 static int
491 xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
492 unsigned long addr,
493 struct xen_ia64_privcmd_range* privcmd_range,
494 int i,
495 unsigned long mfn,
496 pgprot_t prot,
497 domid_t domid)
498 {
499 int error = 0;
500 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
501 unsigned long gpfn;
502 unsigned long flags;
504 if ((addr & ~PAGE_MASK) != 0 || mfn == INVALID_MFN) {
505 error = -EINVAL;
506 goto out;
507 }
509 if (entry->gpfn != INVALID_GPFN) {
510 error = -EBUSY;
511 goto out;
512 }
513 gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
515 flags = ASSIGN_writable;
516 if (pgprot_val(prot) == PROT_READ) {
517 flags = ASSIGN_readonly;
518 }
519 error = HYPERVISOR_add_physmap(gpfn, mfn, flags, domid);
520 if (error != 0) {
521 goto out;
522 }
524 prot = vma->vm_page_prot;
525 error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
526 if (error != 0) {
527 error = HYPERVISOR_zap_physmap(gpfn, 0);
528 if (error) {
529 BUG();//XXX
530 }
531 } else {
532 atomic_inc(&entry->map_count);
533 entry->gpfn = gpfn;
534 }
536 out:
537 return error;
538 }
540 static void
541 xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range,
542 int i)
543 {
544 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
545 unsigned long gpfn = entry->gpfn;
546 //gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
547 // (vma->vm_pgoff - privcmd_range->pgoff);
548 int error;
550 error = HYPERVISOR_zap_physmap(gpfn, 0);
551 if (error) {
552 BUG();//XXX
553 }
554 entry->gpfn = INVALID_GPFN;
555 }
557 static void
558 xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range,
559 int i)
560 {
561 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
562 if (entry->gpfn != INVALID_GPFN) {
563 atomic_inc(&entry->map_count);
564 } else {
565 BUG_ON(atomic_read(&entry->map_count) != 0);
566 }
567 }
569 static void
570 xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range,
571 int i)
572 {
573 struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
574 if (entry->gpfn != INVALID_GPFN &&
575 atomic_dec_and_test(&entry->map_count)) {
576 xen_ia64_privcmd_entry_munmap(privcmd_range, i);
577 }
578 }
580 static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
581 static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
583 struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
584 .open = &xen_ia64_privcmd_vma_open,
585 .close = &xen_ia64_privcmd_vma_close,
586 };
588 static void
589 __xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
590 struct xen_ia64_privcmd_vma* privcmd_vma,
591 struct xen_ia64_privcmd_range* privcmd_range)
592 {
593 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
594 unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
595 unsigned long i;
597 BUG_ON(entry_offset < 0);
598 BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
600 privcmd_vma->range = privcmd_range;
601 privcmd_vma->num_entries = num_entries;
602 privcmd_vma->entries = &privcmd_range->entries[entry_offset];
603 vma->vm_private_data = privcmd_vma;
604 for (i = 0; i < privcmd_vma->num_entries; i++) {
605 xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
606 }
608 vma->vm_private_data = privcmd_vma;
609 vma->vm_ops = &xen_ia64_privcmd_vm_ops;
610 }
612 static void
613 xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
614 {
615 struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
616 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
618 atomic_inc(&privcmd_range->ref_count);
619 // vm_op->open() can't fail.
620 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
622 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
623 }
625 static void
626 xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
627 {
628 struct xen_ia64_privcmd_vma* privcmd_vma =
629 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
630 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
631 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
632 unsigned long i;
634 for (i = 0; i < privcmd_vma->num_entries; i++) {
635 xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
636 }
637 vma->vm_private_data = NULL;
638 kfree(privcmd_vma);
640 if (atomic_dec_and_test(&privcmd_range->ref_count)) {
641 #if 1
642 for (i = 0; i < privcmd_range->num_entries; i++) {
643 struct xen_ia64_privcmd_entry* entry =
644 &privcmd_range->entries[i];
645 BUG_ON(atomic_read(&entry->map_count) != 0);
646 BUG_ON(entry->gpfn != INVALID_GPFN);
647 }
648 #endif
649 release_resource(privcmd_range->res);
650 kfree(privcmd_range->res);
651 vfree(privcmd_range);
652 }
653 }
655 int
656 privcmd_mmap(struct file * file, struct vm_area_struct * vma)
657 {
658 int error;
659 unsigned long size = vma->vm_end - vma->vm_start;
660 unsigned long num_entries = size >> PAGE_SHIFT;
661 struct xen_ia64_privcmd_range* privcmd_range = NULL;
662 struct xen_ia64_privcmd_vma* privcmd_vma = NULL;
663 struct resource* res = NULL;
664 unsigned long i;
665 BUG_ON(!is_running_on_xen());
667 BUG_ON(file->private_data != NULL);
669 error = -ENOMEM;
670 privcmd_range =
671 vmalloc(sizeof(*privcmd_range) +
672 sizeof(privcmd_range->entries[0]) * num_entries);
673 if (privcmd_range == NULL) {
674 goto out_enomem0;
675 }
676 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
677 if (privcmd_vma == NULL) {
678 goto out_enomem1;
679 }
680 res = kzalloc(sizeof(*res), GFP_KERNEL);
681 if (res == NULL) {
682 goto out_enomem1;
683 }
684 res->name = "Xen privcmd mmap";
685 error = allocate_resource(&iomem_resource, res, size,
686 privcmd_resource_min, privcmd_resource_max,
687 privcmd_resource_align, NULL, NULL);
688 if (error) {
689 goto out_enomem1;
690 }
691 privcmd_range->res = res;
693 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
694 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
696 atomic_set(&privcmd_range->ref_count, 1);
697 privcmd_range->pgoff = vma->vm_pgoff;
698 privcmd_range->num_entries = num_entries;
699 for (i = 0; i < privcmd_range->num_entries; i++) {
700 xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
701 }
703 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
704 return 0;
706 out_enomem1:
707 kfree(res);
708 kfree(privcmd_vma);
709 out_enomem0:
710 vfree(privcmd_range);
711 return error;
712 }
714 int
715 direct_remap_pfn_range(struct vm_area_struct *vma,
716 unsigned long address, // process virtual address
717 unsigned long mfn, // mfn, mfn + 1, ... mfn + size/PAGE_SIZE
718 unsigned long size,
719 pgprot_t prot,
720 domid_t domid) // target domain
721 {
722 struct xen_ia64_privcmd_vma* privcmd_vma =
723 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
724 struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
725 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
727 unsigned long i;
728 unsigned long offset;
729 int error = 0;
730 BUG_ON(!is_running_on_xen());
732 #if 0
733 if (prot != vm->vm_page_prot) {
734 return -EINVAL;
735 }
736 #endif
738 i = (address - vma->vm_start) >> PAGE_SHIFT;
739 for (offset = 0; offset < size; offset += PAGE_SIZE) {
740 error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, mfn, prot, domid);
741 if (error != 0) {
742 break;
743 }
745 i++;
746 mfn++;
747 }
749 return error;
750 }
753 /* Called after suspend, to resume time. */
754 void
755 time_resume(void)
756 {
757 extern void ia64_cpu_local_tick(void);
759 /* Just trigger a tick. */
760 ia64_cpu_local_tick();
761 }
763 ///////////////////////////////////////////////////////////////////////////
764 // expose p2m table
765 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
766 #include <linux/cpu.h>
767 #include <asm/uaccess.h>
769 int p2m_initialized __read_mostly = 0;
771 unsigned long p2m_min_low_pfn __read_mostly;
772 unsigned long p2m_max_low_pfn __read_mostly;
773 unsigned long p2m_convert_min_pfn __read_mostly;
774 unsigned long p2m_convert_max_pfn __read_mostly;
776 static struct resource p2m_resource = {
777 .name = "Xen p2m table",
778 .flags = IORESOURCE_MEM,
779 };
780 static unsigned long p2m_assign_start_pfn __read_mostly;
781 static unsigned long p2m_assign_end_pfn __read_mostly;
782 volatile const pte_t* p2m_pte __read_mostly;
784 #define GRNULE_PFN PTRS_PER_PTE
785 static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN;
787 #define ROUNDDOWN(x, y) ((x) & ~((y) - 1))
788 #define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1))
790 #define P2M_PREFIX "Xen p2m: "
792 static int xen_ia64_p2m_expose __read_mostly = 1;
793 module_param(xen_ia64_p2m_expose, int, 0);
794 MODULE_PARM_DESC(xen_ia64_p2m_expose,
795 "enable/disable xen/ia64 p2m exposure optimization\n");
797 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
798 static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
799 module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
800 MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
801 "use/unuse dtr to map exposed p2m table\n");
803 static const int p2m_page_shifts[] = {
804 _PAGE_SIZE_4K,
805 _PAGE_SIZE_8K,
806 _PAGE_SIZE_16K,
807 _PAGE_SIZE_64K,
808 _PAGE_SIZE_256K,
809 _PAGE_SIZE_1M,
810 _PAGE_SIZE_4M,
811 _PAGE_SIZE_16M,
812 _PAGE_SIZE_64M,
813 _PAGE_SIZE_256M,
814 };
816 struct p2m_itr_arg {
817 unsigned long vaddr;
818 unsigned long pteval;
819 unsigned long log_page_size;
820 };
821 static struct p2m_itr_arg p2m_itr_arg __read_mostly;
823 // This should be in asm-ia64/kregs.h
824 #define IA64_TR_P2M_TABLE 3
826 static void
827 p2m_itr(void* info)
828 {
829 struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info;
830 ia64_itr(0x2, IA64_TR_P2M_TABLE,
831 arg->vaddr, arg->pteval, arg->log_page_size);
832 ia64_srlz_d();
833 }
835 static int
836 p2m_expose_dtr_call(struct notifier_block *self,
837 unsigned long event, void* ptr)
838 {
839 unsigned int cpu = (unsigned int)(long)ptr;
840 if (event != CPU_ONLINE)
841 return 0;
842 if (!(p2m_initialized && xen_ia64_p2m_expose_use_dtr))
843 smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg, 1, 1);
844 return 0;
845 }
847 static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
848 .notifier_call = p2m_expose_dtr_call,
849 .next = NULL,
850 .priority = 0
851 };
852 #endif
854 static int
855 p2m_expose_init(void)
856 {
857 unsigned long num_pfn;
858 unsigned long size = 0;
859 unsigned long p2m_size = 0;
860 unsigned long align = ~0UL;
861 int error = 0;
862 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
863 int i;
864 unsigned long page_size;
865 unsigned long log_page_size = 0;
866 #endif
868 if (!xen_ia64_p2m_expose)
869 return -ENOSYS;
870 if (p2m_initialized)
871 return 0;
873 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
874 error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
875 if (error < 0)
876 return error;
877 #endif
879 lock_cpu_hotplug();
880 if (p2m_initialized)
881 goto out;
883 #ifdef CONFIG_DISCONTIGMEM
884 p2m_min_low_pfn = min_low_pfn;
885 p2m_max_low_pfn = max_low_pfn;
886 #else
887 p2m_min_low_pfn = 0;
888 p2m_max_low_pfn = max_pfn;
889 #endif
891 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
892 if (xen_ia64_p2m_expose_use_dtr) {
893 unsigned long granule_pfn = 0;
894 p2m_size = p2m_max_low_pfn - p2m_min_low_pfn;
895 for (i = 0;
896 i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
897 i++) {
898 log_page_size = p2m_page_shifts[i];
899 page_size = 1UL << log_page_size;
900 if (page_size < p2m_size)
901 continue;
903 granule_pfn = max(page_size >> PAGE_SHIFT,
904 p2m_granule_pfn);
905 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
906 granule_pfn);
907 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
908 granule_pfn);
909 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
910 size = num_pfn << PAGE_SHIFT;
911 p2m_size = num_pfn / PTRS_PER_PTE;
912 p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT);
913 if (p2m_size == page_size)
914 break;
915 }
916 if (p2m_size != page_size) {
917 printk(KERN_ERR "p2m_size != page_size\n");
918 error = -EINVAL;
919 goto out;
920 }
921 align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
922 } else
923 #endif
924 {
925 BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
926 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
927 p2m_granule_pfn);
928 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn);
929 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
930 size = num_pfn << PAGE_SHIFT;
931 p2m_size = num_pfn / PTRS_PER_PTE;
932 p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
933 align = max(privcmd_resource_align,
934 p2m_granule_pfn << PAGE_SHIFT);
935 }
937 // use privcmd region
938 error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
939 privcmd_resource_min, privcmd_resource_max,
940 align, NULL, NULL);
941 if (error) {
942 printk(KERN_ERR P2M_PREFIX
943 "can't allocate region for p2m exposure "
944 "[0x%016lx, 0x%016lx) 0x%016lx\n",
945 p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
946 goto out;
947 }
949 p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
950 p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
952 error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
953 p2m_assign_start_pfn,
954 size, p2m_granule_pfn);
955 if (error) {
956 printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
957 error);
958 printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
959 "size 0x%016lx granule 0x%016lx\n",
960 p2m_convert_min_pfn, p2m_assign_start_pfn,
961 size, p2m_granule_pfn);;
962 release_resource(&p2m_resource);
963 goto out;
964 }
965 p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
966 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
967 if (xen_ia64_p2m_expose_use_dtr) {
968 p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
969 << PAGE_SHIFT);
970 p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
971 PAGE_KERNEL));
972 p2m_itr_arg.log_page_size = log_page_size;
973 smp_mb();
974 smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
975 p2m_itr(&p2m_itr_arg);
976 }
977 #endif
978 smp_mb();
979 p2m_initialized = 1;
980 printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
981 p2m_convert_min_pfn << PAGE_SHIFT,
982 p2m_convert_max_pfn << PAGE_SHIFT);
983 printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
984 p2m_assign_start_pfn << PAGE_SHIFT,
985 p2m_assign_end_pfn << PAGE_SHIFT,
986 p2m_size / 1024);
987 out:
988 unlock_cpu_hotplug();
989 return error;
990 }
992 #ifdef notyet
993 void
994 p2m_expose_cleanup(void)
995 {
996 BUG_ON(!p2m_initialized);
997 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
998 unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
999 #endif
1000 release_resource(&p2m_resource);
1002 #endif
1004 //XXX inlinize?
1005 unsigned long
1006 p2m_phystomach(unsigned long gpfn)
1008 volatile const pte_t* pte;
1009 unsigned long mfn;
1010 unsigned long pteval;
1012 if (!p2m_initialized ||
1013 gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
1014 /* || !pfn_valid(gpfn) */)
1015 return INVALID_MFN;
1016 pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
1018 mfn = INVALID_MFN;
1019 if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
1020 pte_present(__pte(pteval)) &&
1021 pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
1022 mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
1024 return mfn;
1027 EXPORT_SYMBOL_GPL(p2m_initialized);
1028 EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
1029 EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
1030 EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
1031 EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
1032 EXPORT_SYMBOL_GPL(p2m_pte);
1033 EXPORT_SYMBOL_GPL(p2m_phystomach);
1034 #endif