ia64/linux-2.6.18-xen.hg

annotate arch/ia64/xen/hypervisor.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents e410857fd83c
children
rev   line source
ian@26 1 /******************************************************************************
ian@26 2 * include/asm-ia64/shadow.h
ian@26 3 *
ian@26 4 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
ian@26 5 * VA Linux Systems Japan K.K.
ian@26 6 *
ian@26 7 * This program is free software; you can redistribute it and/or modify
ian@26 8 * it under the terms of the GNU General Public License as published by
ian@26 9 * the Free Software Foundation; either version 2 of the License, or
ian@26 10 * (at your option) any later version.
ian@26 11 *
ian@26 12 * This program is distributed in the hope that it will be useful,
ian@26 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ian@26 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ian@26 15 * GNU General Public License for more details.
ian@26 16 *
ian@26 17 * You should have received a copy of the GNU General Public License
ian@26 18 * along with this program; if not, write to the Free Software
ian@26 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
ian@26 20 *
ian@26 21 */
ian@26 22
ian@26 23 #include <linux/spinlock.h>
ian@26 24 #include <linux/bootmem.h>
ian@26 25 #include <linux/module.h>
ian@26 26 #include <linux/vmalloc.h>
ian@26 27 #include <linux/efi.h>
ian@26 28 #include <asm/page.h>
ian@26 29 #include <asm/pgalloc.h>
ian@26 30 #include <asm/meminit.h>
ian@26 31 #include <asm/hypervisor.h>
ian@26 32 #include <asm/hypercall.h>
ian@26 33 #include <xen/interface/memory.h>
ian@26 34 #include <xen/xencons.h>
ian@26 35 #include <xen/balloon.h>
ian@26 36
alex@392 37 shared_info_t *HYPERVISOR_shared_info __read_mostly =
alex@392 38 (shared_info_t *)XSI_BASE;
ian@26 39 EXPORT_SYMBOL(HYPERVISOR_shared_info);
ian@26 40
ian@26 41 start_info_t *xen_start_info;
ian@26 42 EXPORT_SYMBOL(xen_start_info);
ian@26 43
ian@26 44 EXPORT_SYMBOL(running_on_xen);
ian@26 45
ian@26 46 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
ian@26 47 static int p2m_expose_init(void);
ian@26 48 #else
ian@26 49 #define p2m_expose_init() (-ENOSYS)
ian@26 50 #define p2m_expose_resume() ((void)0)
ian@26 51 #endif
ian@26 52
ian@26 53 EXPORT_SYMBOL(__hypercall);
ian@26 54
ian@26 55 void __init
ian@26 56 xen_setup(char **cmdline_p)
ian@26 57 {
keir@261 58 struct dom0_vga_console_info *info;
ian@26 59 extern void dig_setup(char **cmdline_p);
keir@261 60
ian@26 61 if (ia64_platform_is("xen"))
ian@26 62 dig_setup(cmdline_p);
alex@392 63
ian@26 64 if (!is_running_on_xen() || !is_initial_xendomain())
ian@26 65 return;
ian@26 66
keir@261 67 info = (void *)((char *)xen_start_info +
keir@261 68 xen_start_info->console.dom0.info_off);
keir@261 69 dom0_init_screen_info(info, xen_start_info->console.dom0.info_size);
keir@261 70
ian@26 71 xen_start_info->console.domU.mfn = 0;
ian@26 72 xen_start_info->console.domU.evtchn = 0;
ian@26 73 }
ian@26 74
ian@26 75 void __cpuinit
ian@26 76 xen_cpu_init(void)
ian@26 77 {
ian@26 78 extern void xen_smp_intr_init(void);
ian@26 79 xen_smp_intr_init();
ian@26 80 }
ian@26 81
alex@392 82 /*
alex@392 83 * __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
alex@392 84 * are based on i386 xen_create_contiguous_region(),
alex@392 85 * xen_destroy_contiguous_region()
alex@392 86 */
ian@26 87
ian@26 88 /* Protected by balloon_lock. */
ian@26 89 #define MAX_CONTIG_ORDER 7
ian@26 90 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
ian@26 91
ian@26 92 /* Ensure multi-page extents are contiguous in machine memory. */
ian@26 93 int
ian@26 94 __xen_create_contiguous_region(unsigned long vstart,
ian@26 95 unsigned int order, unsigned int address_bits)
ian@26 96 {
ian@26 97 unsigned long error = 0;
ian@26 98 unsigned long gphys = __pa(vstart);
ian@26 99 unsigned long start_gpfn = gphys >> PAGE_SHIFT;
ian@26 100 unsigned long num_gpfn = 1 << order;
ian@26 101 unsigned long i;
ian@26 102 unsigned long flags;
ian@26 103
ian@26 104 unsigned long *in_frames = discontig_frames, out_frame;
ian@26 105 int success;
ian@26 106 struct xen_memory_exchange exchange = {
ian@26 107 .in = {
ian@26 108 .nr_extents = num_gpfn,
ian@26 109 .extent_order = 0,
ian@26 110 .domid = DOMID_SELF
ian@26 111 },
ian@26 112 .out = {
ian@26 113 .nr_extents = 1,
ian@26 114 .extent_order = order,
ian@26 115 .address_bits = address_bits,
ian@26 116 .domid = DOMID_SELF
ian@26 117 },
ian@26 118 .nr_exchanged = 0
ian@26 119 };
ian@26 120
ian@26 121 if (unlikely(order > MAX_CONTIG_ORDER))
ian@26 122 return -ENOMEM;
ian@26 123
ian@26 124 set_xen_guest_handle(exchange.in.extent_start, in_frames);
ian@26 125 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
ian@26 126
ian@26 127 scrub_pages(vstart, num_gpfn);
ian@26 128
ian@26 129 balloon_lock(flags);
ian@26 130
ian@26 131 /* Get a new contiguous memory extent. */
alex@392 132 for (i = 0; i < num_gpfn; i++)
ian@26 133 in_frames[i] = start_gpfn + i;
ian@26 134 out_frame = start_gpfn;
ian@26 135 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
ian@26 136 success = (exchange.nr_exchanged == num_gpfn);
ian@26 137 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
ian@26 138 BUG_ON(success && (error != 0));
ian@26 139 if (unlikely(error == -ENOSYS)) {
ian@26 140 /* Compatibility when XENMEM_exchange is unsupported. */
ian@26 141 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
ian@26 142 &exchange.in);
ian@26 143 BUG_ON(error != num_gpfn);
ian@26 144 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
ian@26 145 &exchange.out);
ian@26 146 if (error != 1) {
ian@26 147 /* Couldn't get special memory: fall back to normal. */
ian@26 148 for (i = 0; i < num_gpfn; i++) {
ian@26 149 in_frames[i] = start_gpfn + i;
ian@26 150 }
ian@26 151 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
ian@26 152 &exchange.in);
ian@26 153 BUG_ON(error != num_gpfn);
ian@26 154 success = 0;
ian@26 155 } else
ian@26 156 success = 1;
ian@26 157 }
ian@26 158 #if 0
ian@26 159 if (success) {
ian@26 160 unsigned long mfn;
ian@26 161 unsigned long mfn_prev = ~0UL;
ian@26 162 for (i = 0; i < num_gpfn; i++) {
ian@26 163 mfn = pfn_to_mfn_for_dma(start_gpfn + i);
ian@26 164 if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
ian@26 165 xprintk("\n");
ian@26 166 xprintk("%s:%d order %d "
ian@26 167 "start 0x%lx bus 0x%lx "
ian@26 168 "machine 0x%lx\n",
ian@26 169 __func__, __LINE__, order,
ian@26 170 vstart, virt_to_bus((void*)vstart),
ian@26 171 phys_to_machine_for_dma(gphys));
ian@26 172 xprintk("mfn: ");
ian@26 173 for (i = 0; i < num_gpfn; i++) {
ian@26 174 mfn = pfn_to_mfn_for_dma(
ian@26 175 start_gpfn + i);
ian@26 176 xprintk("0x%lx ", mfn);
ian@26 177 }
ian@26 178 xprintk("\n");
ian@26 179 break;
ian@26 180 }
ian@26 181 mfn_prev = mfn;
ian@26 182 }
ian@26 183 }
ian@26 184 #endif
ian@26 185 balloon_unlock(flags);
ian@26 186 return success? 0: -ENOMEM;
ian@26 187 }
ian@26 188
ian@26 189 void
ian@26 190 __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
ian@26 191 {
ian@26 192 unsigned long flags;
ian@26 193 unsigned long error = 0;
ian@26 194 unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
ian@26 195 unsigned long num_gpfn = 1UL << order;
ian@26 196 unsigned long i;
ian@26 197
ian@26 198 unsigned long *out_frames = discontig_frames, in_frame;
ian@26 199 int success;
ian@26 200 struct xen_memory_exchange exchange = {
ian@26 201 .in = {
ian@26 202 .nr_extents = 1,
ian@26 203 .extent_order = order,
ian@26 204 .domid = DOMID_SELF
ian@26 205 },
ian@26 206 .out = {
ian@26 207 .nr_extents = num_gpfn,
ian@26 208 .extent_order = 0,
ian@26 209 .address_bits = 0,
ian@26 210 .domid = DOMID_SELF
ian@26 211 },
ian@26 212 .nr_exchanged = 0
alex@392 213 };
ian@26 214
ian@26 215
ian@26 216 if (unlikely(order > MAX_CONTIG_ORDER))
ian@26 217 return;
ian@26 218
ian@26 219 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
ian@26 220 set_xen_guest_handle(exchange.out.extent_start, out_frames);
ian@26 221
ian@26 222 scrub_pages(vstart, num_gpfn);
ian@26 223
ian@26 224 balloon_lock(flags);
ian@26 225
alex@392 226 /* Do the exchange for non-contiguous MFNs. */
ian@26 227 in_frame = start_gpfn;
alex@392 228 for (i = 0; i < num_gpfn; i++)
ian@26 229 out_frames[i] = start_gpfn + i;
ian@26 230 error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
ian@26 231 success = (exchange.nr_exchanged == 1);
ian@26 232 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
ian@26 233 BUG_ON(success && (error != 0));
ian@26 234 if (unlikely(error == -ENOSYS)) {
alex@392 235 /* Compatibility when XENMEM_exchange is unsupported. */
ian@26 236 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
ian@26 237 &exchange.in);
ian@26 238 BUG_ON(error != 1);
ian@26 239
ian@26 240 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
ian@26 241 &exchange.out);
ian@26 242 BUG_ON(error != num_gpfn);
ian@26 243 }
ian@26 244 balloon_unlock(flags);
ian@26 245 }
ian@26 246
alex@225 247 int
alex@225 248 xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
alex@225 249 unsigned int address_bits)
alex@225 250 {
alex@225 251 return xen_create_contiguous_region((unsigned long)page_address(pages),
alex@225 252 order, address_bits);
alex@225 253 }
alex@225 254
alex@392 255 /****************************************************************************
alex@392 256 * grant table hack
alex@392 257 * cmd: GNTTABOP_xxx
alex@392 258 */
ian@26 259 #include <linux/mm.h>
ian@26 260 #include <xen/interface/xen.h>
ian@26 261 #include <xen/gnttab.h>
ian@26 262
kfraser@106 263 void *arch_gnttab_alloc_shared(unsigned long *frames)
kfraser@106 264 {
kfraser@106 265 return __va(frames[0] << PAGE_SHIFT);
kfraser@106 266 }
kfraser@106 267
ian@26 268 static void
ian@26 269 gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
ian@26 270 {
ian@26 271 uint32_t flags;
ian@26 272
ian@26 273 flags = uop->flags;
ian@26 274
ian@26 275 if (flags & GNTMAP_host_map) {
ian@26 276 if (flags & GNTMAP_application_map) {
alex@392 277 xprintd("GNTMAP_application_map is not supported yet:"
alex@392 278 " flags 0x%x\n", flags);
ian@26 279 BUG();
ian@26 280 }
ian@26 281 if (flags & GNTMAP_contains_pte) {
alex@392 282 xprintd("GNTMAP_contains_pte is not supported yet"
alex@392 283 " flags 0x%x\n", flags);
ian@26 284 BUG();
ian@26 285 }
ian@26 286 } else if (flags & GNTMAP_device_map) {
alex@392 287 xprintd("GNTMAP_device_map is not supported yet 0x%x\n",
alex@392 288 flags);
alex@392 289 BUG(); /* XXX not yet. actually this flag is not used. */
ian@26 290 } else {
ian@26 291 BUG();
ian@26 292 }
ian@26 293 }
ian@26 294
ian@26 295 int
ian@26 296 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
ian@26 297 {
ian@26 298 if (cmd == GNTTABOP_map_grant_ref) {
ian@26 299 unsigned int i;
ian@26 300 for (i = 0; i < count; i++) {
ian@26 301 gnttab_map_grant_ref_pre(
ian@26 302 (struct gnttab_map_grant_ref*)uop + i);
ian@26 303 }
ian@26 304 }
alex@187 305 return xencomm_hypercall_grant_table_op(cmd, uop, count);
ian@26 306 }
ian@26 307 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
ian@26 308
alex@392 309 /**************************************************************************
alex@392 310 * foreign mapping
alex@392 311 */
ian@26 312 #include <linux/efi.h>
alex@392 313 #include <asm/meminit.h> /* for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}() */
ian@26 314
ian@26 315 static unsigned long privcmd_resource_min = 0;
alex@392 316 /* Xen/ia64 currently can handle pseudo physical address bits up to
alex@392 317 * (PAGE_SHIFT * 3) */
alex@392 318 static unsigned long privcmd_resource_max =
alex@392 319 GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
ian@26 320 static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
ian@26 321
ian@26 322 static unsigned long
ian@26 323 md_end_addr(const efi_memory_desc_t *md)
ian@26 324 {
ian@26 325 return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
ian@26 326 }
ian@26 327
ian@26 328 #define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE (1024 * 1024 * 1024UL)
ian@26 329 static int
ian@26 330 xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
ian@26 331 {
ian@26 332 return (start < end &&
ian@26 333 (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
ian@26 334 }
ian@26 335
ian@26 336 static int __init
ian@26 337 xen_ia64_privcmd_init(void)
ian@26 338 {
ian@26 339 void *efi_map_start, *efi_map_end, *p;
ian@26 340 u64 efi_desc_size;
ian@26 341 efi_memory_desc_t *md;
ian@26 342 unsigned long tmp_min;
ian@26 343 unsigned long tmp_max;
ian@26 344 unsigned long gap_size;
ian@26 345 unsigned long prev_end;
ian@26 346
ian@26 347 if (!is_running_on_xen())
ian@26 348 return -1;
ian@26 349
ian@26 350 efi_map_start = __va(ia64_boot_param->efi_memmap);
ian@26 351 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
ian@26 352 efi_desc_size = ia64_boot_param->efi_memdesc_size;
ian@26 353
alex@392 354 /* at first check the used highest address */
ian@26 355 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
alex@392 356 /* nothing */;
ian@26 357 }
ian@26 358 md = p - efi_desc_size;
ian@26 359 privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
ian@26 360 if (xen_ia64_privcmd_check_size(privcmd_resource_min,
alex@392 361 privcmd_resource_max))
ian@26 362 goto out;
ian@26 363
alex@392 364 /* the used highest address is too large.
alex@392 365 * try to find the largest gap. */
ian@26 366 tmp_min = privcmd_resource_max;
ian@26 367 tmp_max = 0;
ian@26 368 gap_size = 0;
ian@26 369 prev_end = 0;
ian@26 370 for (p = efi_map_start;
ian@26 371 p < efi_map_end - efi_desc_size;
ian@26 372 p += efi_desc_size) {
ian@26 373 unsigned long end;
ian@26 374 efi_memory_desc_t* next;
ian@26 375 unsigned long next_start;
ian@26 376
ian@26 377 md = p;
ian@26 378 end = md_end_addr(md);
alex@392 379 if (end > privcmd_resource_max)
ian@26 380 break;
ian@26 381 if (end < prev_end) {
alex@392 382 /* work around.
alex@392 383 * Xen may pass incompletely sorted memory
alex@392 384 * descriptors like
alex@392 385 * [x, x + length]
alex@392 386 * [x, x]
alex@392 387 * this order should be reversed. */
ian@26 388 continue;
ian@26 389 }
ian@26 390 next = p + efi_desc_size;
ian@26 391 next_start = next->phys_addr;
alex@392 392 if (next_start > privcmd_resource_max)
ian@26 393 next_start = privcmd_resource_max;
ian@26 394 if (end < next_start && gap_size < (next_start - end)) {
ian@26 395 tmp_min = end;
ian@26 396 tmp_max = next_start;
ian@26 397 gap_size = tmp_max - tmp_min;
ian@26 398 }
ian@26 399 prev_end = end;
ian@26 400 }
ian@26 401
ian@26 402 privcmd_resource_min = GRANULEROUNDUP(tmp_min);
ian@26 403 if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
ian@26 404 privcmd_resource_max = tmp_max;
ian@26 405 goto out;
ian@26 406 }
ian@26 407
ian@26 408 privcmd_resource_min = tmp_min;
ian@26 409 privcmd_resource_max = tmp_max;
ian@26 410 if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
ian@26 411 privcmd_resource_max)) {
alex@392 412 /* Any large enough gap isn't found.
alex@392 413 * go ahead anyway with the warning hoping that large region
alex@392 414 * won't be requested. */
alex@392 415 printk(KERN_WARNING "xen privcmd: "
alex@392 416 "large enough region for privcmd mmap is not found.\n");
ian@26 417 }
ian@26 418
ian@26 419 out:
alex@392 420 printk(KERN_INFO "xen privcmd uses pseudo physical addr range "
alex@392 421 "[0x%lx, 0x%lx] (%ldMB)\n",
ian@26 422 privcmd_resource_min, privcmd_resource_max,
ian@26 423 (privcmd_resource_max - privcmd_resource_min) >> 20);
ian@26 424 BUG_ON(privcmd_resource_min >= privcmd_resource_max);
ian@26 425
alex@392 426 /* XXX this should be somewhere appropriate */
ian@26 427 (void)p2m_expose_init();
ian@26 428
ian@26 429 return 0;
ian@26 430 }
ian@26 431 late_initcall(xen_ia64_privcmd_init);
ian@26 432
ian@26 433 struct xen_ia64_privcmd_entry {
ian@26 434 atomic_t map_count;
ian@26 435 #define INVALID_GPFN (~0UL)
ian@26 436 unsigned long gpfn;
ian@26 437 };
ian@26 438
ian@26 439 struct xen_ia64_privcmd_range {
ian@26 440 atomic_t ref_count;
alex@392 441 unsigned long pgoff; /* in PAGE_SIZE */
alex@392 442 struct resource *res;
ian@26 443
alex@392 444 /* for foreign domain p2m mapping */
alex@392 445 void *private;
alex@392 446 void (*callback)(struct xen_ia64_privcmd_range *range, void *arg);
alex@197 447
ian@26 448 unsigned long num_entries;
ian@26 449 struct xen_ia64_privcmd_entry entries[0];
ian@26 450 };
ian@26 451
ian@26 452 struct xen_ia64_privcmd_vma {
ian@26 453 int is_privcmd_mmapped;
alex@392 454 struct xen_ia64_privcmd_range *range;
ian@26 455
ian@26 456 unsigned long num_entries;
alex@392 457 struct xen_ia64_privcmd_entry *entries;
ian@26 458 };
ian@26 459
ian@26 460 static void
alex@392 461 xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry *entry)
ian@26 462 {
ian@26 463 atomic_set(&entry->map_count, 0);
ian@26 464 entry->gpfn = INVALID_GPFN;
ian@26 465 }
ian@26 466
ian@26 467 static int
alex@392 468 xen_ia64_privcmd_entry_mmap(struct vm_area_struct *vma,
ian@26 469 unsigned long addr,
alex@392 470 struct xen_ia64_privcmd_range *privcmd_range,
ian@26 471 int i,
ian@26 472 unsigned long gmfn,
ian@26 473 pgprot_t prot,
ian@26 474 domid_t domid)
ian@26 475 {
ian@26 476 int error = 0;
alex@392 477 struct xen_ia64_privcmd_entry *entry = &privcmd_range->entries[i];
ian@26 478 unsigned long gpfn;
ian@26 479 unsigned long flags;
ian@26 480
ian@26 481 if ((addr & ~PAGE_MASK) != 0 || gmfn == INVALID_MFN) {
ian@26 482 error = -EINVAL;
ian@26 483 goto out;
ian@26 484 }
ian@26 485
ian@26 486 if (entry->gpfn != INVALID_GPFN) {
ian@26 487 error = -EBUSY;
ian@26 488 goto out;
ian@26 489 }
ian@26 490 gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
ian@26 491
ian@26 492 flags = ASSIGN_writable;
alex@392 493 if (pgprot_val(prot) == PROT_READ)
ian@26 494 flags = ASSIGN_readonly;
ian@26 495 error = HYPERVISOR_add_physmap_with_gmfn(gpfn, gmfn, flags, domid);
alex@392 496 if (error != 0)
ian@26 497 goto out;
ian@26 498
ian@26 499 prot = vma->vm_page_prot;
ian@26 500 error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
alex@438 501 /*
alex@438 502 * VM_PFNMAP is set in remap_pfn_range().
alex@438 503 * Reset the flag to avoid BUG_ON() in do_no_page().
alex@438 504 */
alex@438 505 vma->vm_flags &= ~VM_PFNMAP;
alex@438 506
ian@26 507 if (error != 0) {
ian@26 508 error = HYPERVISOR_zap_physmap(gpfn, 0);
alex@392 509 if (error)
alex@392 510 BUG(); /* XXX */
ian@26 511 } else {
ian@26 512 atomic_inc(&entry->map_count);
ian@26 513 entry->gpfn = gpfn;
ian@26 514 }
ian@26 515
ian@26 516 out:
ian@26 517 return error;
ian@26 518 }
ian@26 519
ian@26 520 static void
alex@392 521 xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range *privcmd_range,
ian@26 522 int i)
ian@26 523 {
alex@392 524 struct xen_ia64_privcmd_entry *entry = &privcmd_range->entries[i];
ian@26 525 unsigned long gpfn = entry->gpfn;
alex@392 526 /* gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
alex@392 527 (vma->vm_pgoff - privcmd_range->pgoff); */
ian@26 528 int error;
ian@26 529
ian@26 530 error = HYPERVISOR_zap_physmap(gpfn, 0);
alex@392 531 if (error)
alex@392 532 BUG(); /* XXX */
ian@26 533 entry->gpfn = INVALID_GPFN;
ian@26 534 }
ian@26 535
ian@26 536 static void
alex@392 537 xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range *privcmd_range,
ian@26 538 int i)
ian@26 539 {
alex@392 540 struct xen_ia64_privcmd_entry *entry = &privcmd_range->entries[i];
alex@392 541 if (entry->gpfn != INVALID_GPFN)
ian@26 542 atomic_inc(&entry->map_count);
alex@392 543 else
ian@26 544 BUG_ON(atomic_read(&entry->map_count) != 0);
ian@26 545 }
ian@26 546
ian@26 547 static void
alex@392 548 xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range *privcmd_range,
ian@26 549 int i)
ian@26 550 {
alex@392 551 struct xen_ia64_privcmd_entry *entry = &privcmd_range->entries[i];
ian@26 552 if (entry->gpfn != INVALID_GPFN &&
alex@392 553 atomic_dec_and_test(&entry->map_count))
ian@26 554 xen_ia64_privcmd_entry_munmap(privcmd_range, i);
ian@26 555 }
ian@26 556
alex@392 557 static void xen_ia64_privcmd_vma_open(struct vm_area_struct *vma);
alex@392 558 static void xen_ia64_privcmd_vma_close(struct vm_area_struct *vma);
ian@26 559
alex@438 560 static struct page *
alex@438 561 xen_ia64_privcmd_vma_nopage(struct vm_area_struct *vma,
alex@438 562 unsigned long address,
alex@438 563 int *type)
alex@438 564 {
alex@438 565 return NOPAGE_SIGBUS;
alex@438 566 }
alex@438 567
ian@26 568 struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
alex@438 569 .open = xen_ia64_privcmd_vma_open,
alex@438 570 .close = xen_ia64_privcmd_vma_close,
alex@438 571 .nopage = xen_ia64_privcmd_vma_nopage
ian@26 572 };
ian@26 573
ian@26 574 static void
alex@392 575 __xen_ia64_privcmd_vma_open(struct vm_area_struct *vma,
alex@392 576 struct xen_ia64_privcmd_vma *privcmd_vma,
alex@392 577 struct xen_ia64_privcmd_range *privcmd_range)
ian@26 578 {
ian@26 579 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
alex@392 580 unsigned long num_entries =
alex@392 581 (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
ian@26 582 unsigned long i;
ian@26 583
ian@26 584 BUG_ON(entry_offset < 0);
ian@26 585 BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
ian@26 586
ian@26 587 privcmd_vma->range = privcmd_range;
ian@26 588 privcmd_vma->num_entries = num_entries;
ian@26 589 privcmd_vma->entries = &privcmd_range->entries[entry_offset];
ian@26 590 vma->vm_private_data = privcmd_vma;
alex@392 591 for (i = 0; i < privcmd_vma->num_entries; i++)
ian@26 592 xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
ian@26 593
ian@26 594 vma->vm_private_data = privcmd_vma;
ian@26 595 vma->vm_ops = &xen_ia64_privcmd_vm_ops;
ian@26 596 }
ian@26 597
ian@26 598 static void
alex@392 599 xen_ia64_privcmd_vma_open(struct vm_area_struct *vma)
ian@26 600 {
alex@392 601 struct xen_ia64_privcmd_vma *old_privcmd_vma =
alex@392 602 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
alex@392 603 struct xen_ia64_privcmd_vma *privcmd_vma =
alex@392 604 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
alex@392 605 struct xen_ia64_privcmd_range *privcmd_range = privcmd_vma->range;
ian@26 606
ian@26 607 atomic_inc(&privcmd_range->ref_count);
alex@392 608 /* vm_op->open() can't fail. */
ian@26 609 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
alex@392 610 /* copy original value if necessary */
ian@26 611 privcmd_vma->is_privcmd_mmapped = old_privcmd_vma->is_privcmd_mmapped;
ian@26 612
ian@26 613 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
ian@26 614 }
ian@26 615
ian@26 616 static void
alex@392 617 xen_ia64_privcmd_vma_close(struct vm_area_struct *vma)
ian@26 618 {
alex@392 619 struct xen_ia64_privcmd_vma *privcmd_vma =
ian@26 620 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
alex@392 621 struct xen_ia64_privcmd_range *privcmd_range = privcmd_vma->range;
ian@26 622 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
ian@26 623 unsigned long i;
ian@26 624
ian@26 625 for (i = 0; i < privcmd_vma->num_entries; i++) {
ian@26 626 xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
alex@277 627 cond_resched();
ian@26 628 }
ian@26 629 vma->vm_private_data = NULL;
ian@26 630 kfree(privcmd_vma);
ian@26 631
ian@26 632 if (atomic_dec_and_test(&privcmd_range->ref_count)) {
ian@26 633 #if 1
ian@26 634 for (i = 0; i < privcmd_range->num_entries; i++) {
alex@392 635 struct xen_ia64_privcmd_entry *entry =
ian@26 636 &privcmd_range->entries[i];
ian@26 637 BUG_ON(atomic_read(&entry->map_count) != 0);
ian@26 638 BUG_ON(entry->gpfn != INVALID_GPFN);
ian@26 639 }
ian@26 640 #endif
alex@197 641 if (privcmd_range->callback)
alex@197 642 (*privcmd_range->callback)(privcmd_range,
alex@197 643 privcmd_range->private);
ian@26 644 release_resource(privcmd_range->res);
ian@26 645 kfree(privcmd_range->res);
ian@26 646 vfree(privcmd_range);
ian@26 647 }
ian@26 648 }
ian@26 649
ian@26 650 int
ian@26 651 privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
ian@26 652 {
alex@392 653 struct xen_ia64_privcmd_vma *privcmd_vma =
ian@26 654 (struct xen_ia64_privcmd_vma *)vma->vm_private_data;
ian@26 655 return (xchg(&privcmd_vma->is_privcmd_mmapped, 1) == 0);
ian@26 656 }
ian@26 657
ian@26 658 int
ian@26 659 privcmd_mmap(struct file * file, struct vm_area_struct * vma)
ian@26 660 {
ian@26 661 int error;
ian@26 662 unsigned long size = vma->vm_end - vma->vm_start;
ian@26 663 unsigned long num_entries = size >> PAGE_SHIFT;
alex@392 664 struct xen_ia64_privcmd_range *privcmd_range = NULL;
alex@392 665 struct xen_ia64_privcmd_vma *privcmd_vma = NULL;
alex@392 666 struct resource *res = NULL;
ian@26 667 unsigned long i;
ian@26 668 BUG_ON(!is_running_on_xen());
ian@26 669
ian@26 670 BUG_ON(file->private_data != NULL);
ian@26 671
ian@26 672 error = -ENOMEM;
ian@26 673 privcmd_range =
ian@26 674 vmalloc(sizeof(*privcmd_range) +
ian@26 675 sizeof(privcmd_range->entries[0]) * num_entries);
alex@392 676 if (privcmd_range == NULL)
ian@26 677 goto out_enomem0;
ian@26 678 privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
alex@392 679 if (privcmd_vma == NULL)
ian@26 680 goto out_enomem1;
ian@26 681 privcmd_vma->is_privcmd_mmapped = 0;
ian@26 682
ian@26 683 res = kzalloc(sizeof(*res), GFP_KERNEL);
alex@392 684 if (res == NULL)
ian@26 685 goto out_enomem1;
ian@26 686 res->name = "Xen privcmd mmap";
ian@26 687 error = allocate_resource(&iomem_resource, res, size,
ian@26 688 privcmd_resource_min, privcmd_resource_max,
ian@26 689 privcmd_resource_align, NULL, NULL);
alex@392 690 if (error)
ian@26 691 goto out_enomem1;
ian@26 692 privcmd_range->res = res;
ian@26 693
ian@26 694 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
alex@438 695 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
ian@26 696
ian@26 697 atomic_set(&privcmd_range->ref_count, 1);
ian@26 698 privcmd_range->pgoff = vma->vm_pgoff;
ian@26 699 privcmd_range->num_entries = num_entries;
alex@197 700 privcmd_range->private = NULL;
alex@197 701 privcmd_range->callback = NULL;
alex@392 702 for (i = 0; i < privcmd_range->num_entries; i++)
ian@26 703 xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
ian@26 704
ian@26 705 __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
ian@26 706 return 0;
ian@26 707
ian@26 708 out_enomem1:
ian@26 709 kfree(res);
ian@26 710 kfree(privcmd_vma);
ian@26 711 out_enomem0:
ian@26 712 vfree(privcmd_range);
ian@26 713 return error;
ian@26 714 }
ian@26 715
ian@26 716 int
ian@26 717 direct_remap_pfn_range(struct vm_area_struct *vma,
alex@392 718 unsigned long address, /* process virtual address */
alex@392 719 unsigned long gmfn, /* gmfn, gmfn + 1, ... gmfn + size/PAGE_SIZE */
ian@26 720 unsigned long size,
ian@26 721 pgprot_t prot,
alex@392 722 domid_t domid) /* target domain */
ian@26 723 {
alex@392 724 struct xen_ia64_privcmd_vma *privcmd_vma =
ian@26 725 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
alex@392 726 struct xen_ia64_privcmd_range *privcmd_range = privcmd_vma->range;
ian@26 727 unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
ian@26 728
ian@26 729 unsigned long i;
ian@26 730 unsigned long offset;
ian@26 731 int error = 0;
ian@26 732 BUG_ON(!is_running_on_xen());
ian@26 733
ian@26 734 #if 0
alex@392 735 if (prot != vm->vm_page_prot)
ian@26 736 return -EINVAL;
ian@26 737 #endif
ian@26 738
ian@26 739 i = (address - vma->vm_start) >> PAGE_SHIFT;
ian@26 740 for (offset = 0; offset < size; offset += PAGE_SIZE) {
ian@26 741 error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, gmfn, prot, domid);
alex@392 742 if (error != 0)
ian@26 743 break;
ian@26 744
ian@26 745 i++;
ian@26 746 gmfn++;
alex@392 747 }
ian@26 748
ian@26 749 return error;
ian@26 750 }
ian@26 751
ian@26 752
alex@392 753 /**************************************************************************
alex@392 754 * expose p2m table
alex@392 755 */
ian@26 756 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
ian@26 757 #include <linux/cpu.h>
ian@26 758 #include <asm/uaccess.h>
ian@26 759
ian@26 760 int p2m_initialized __read_mostly = 0;
ian@26 761
ian@26 762 unsigned long p2m_min_low_pfn __read_mostly;
ian@26 763 unsigned long p2m_max_low_pfn __read_mostly;
ian@26 764 unsigned long p2m_convert_min_pfn __read_mostly;
ian@26 765 unsigned long p2m_convert_max_pfn __read_mostly;
ian@26 766
ian@26 767 static struct resource p2m_resource = {
ian@26 768 .name = "Xen p2m table",
ian@26 769 .flags = IORESOURCE_MEM,
ian@26 770 };
ian@26 771 static unsigned long p2m_assign_start_pfn __read_mostly;
ian@26 772 static unsigned long p2m_assign_end_pfn __read_mostly;
alex@392 773 static unsigned long p2m_expose_size; /* this is referenced only when resume.
alex@392 774 * so __read_mostly doesn't make sense.
alex@392 775 */
alex@392 776 volatile const pte_t *p2m_pte __read_mostly;
ian@26 777
alex@87 778 #define GRANULE_PFN PTRS_PER_PTE
alex@87 779 static unsigned long p2m_granule_pfn __read_mostly = GRANULE_PFN;
ian@26 780
ian@26 781 #define ROUNDDOWN(x, y) ((x) & ~((y) - 1))
ian@26 782 #define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1))
ian@26 783
ian@26 784 #define P2M_PREFIX "Xen p2m: "
ian@26 785
ian@26 786 static int xen_ia64_p2m_expose __read_mostly = 1;
ian@26 787 module_param(xen_ia64_p2m_expose, int, 0);
ian@26 788 MODULE_PARM_DESC(xen_ia64_p2m_expose,
alex@392 789 "enable/disable xen/ia64 p2m exposure optimization\n");
ian@26 790
ian@26 791 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
ian@26 792 static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
ian@26 793 module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
ian@26 794 MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
alex@392 795 "use/unuse dtr to map exposed p2m table\n");
ian@26 796
ian@26 797 static const int p2m_page_shifts[] = {
ian@26 798 _PAGE_SIZE_4K,
ian@26 799 _PAGE_SIZE_8K,
ian@26 800 _PAGE_SIZE_16K,
ian@26 801 _PAGE_SIZE_64K,
ian@26 802 _PAGE_SIZE_256K,
ian@26 803 _PAGE_SIZE_1M,
ian@26 804 _PAGE_SIZE_4M,
ian@26 805 _PAGE_SIZE_16M,
ian@26 806 _PAGE_SIZE_64M,
ian@26 807 _PAGE_SIZE_256M,
ian@26 808 };
ian@26 809
ian@26 810 struct p2m_itr_arg {
ian@26 811 unsigned long vaddr;
ian@26 812 unsigned long pteval;
ian@26 813 unsigned long log_page_size;
ian@26 814 };
ian@26 815 static struct p2m_itr_arg p2m_itr_arg __read_mostly;
ian@26 816
alex@392 817 /* This should be in asm-ia64/kregs.h */
ian@26 818 #define IA64_TR_P2M_TABLE 3
ian@26 819
ian@26 820 static void
alex@392 821 p2m_itr(void *info)
ian@26 822 {
alex@392 823 struct p2m_itr_arg *arg = (struct p2m_itr_arg*)info;
ian@26 824 ia64_itr(0x2, IA64_TR_P2M_TABLE,
alex@392 825 arg->vaddr, arg->pteval, arg->log_page_size);
ian@26 826 ia64_srlz_d();
ian@26 827 }
ian@26 828
ian@26 829 static int
ian@26 830 p2m_expose_dtr_call(struct notifier_block *self,
alex@392 831 unsigned long event, void *ptr)
ian@26 832 {
ian@26 833 unsigned int cpu = (unsigned int)(long)ptr;
ian@26 834 if (event != CPU_ONLINE)
ian@26 835 return 0;
ian@26 836 if (p2m_initialized && xen_ia64_p2m_expose_use_dtr) {
ian@26 837 unsigned int me = get_cpu();
ian@26 838 if (cpu == me)
ian@26 839 p2m_itr(&p2m_itr_arg);
ian@26 840 else
ian@26 841 smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg,
ian@26 842 1, 1);
ian@26 843 put_cpu();
ian@26 844 }
ian@26 845 return 0;
ian@26 846 }
ian@26 847
ian@26 848 static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
ian@26 849 .notifier_call = p2m_expose_dtr_call,
ian@26 850 .next = NULL,
ian@26 851 .priority = 0
ian@26 852 };
ian@26 853 #endif
ian@26 854
alex@197 855 static inline unsigned long
alex@197 856 p2m_table_size(unsigned long num_pfn)
alex@197 857 {
alex@197 858 return ((num_pfn + PTRS_PER_PTE - 1) / PTRS_PER_PTE) << PAGE_SHIFT;
alex@197 859 }
alex@197 860
ian@26 861 static int
ian@26 862 p2m_expose_init(void)
ian@26 863 {
ian@26 864 unsigned long num_pfn;
ian@26 865 unsigned long p2m_size = 0;
ian@26 866 unsigned long align = ~0UL;
ian@26 867 int error = 0;
ian@26 868 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
ian@26 869 int i;
ian@26 870 unsigned long log_page_size = 0;
ian@26 871 #endif
ian@26 872
ian@26 873 if (!xen_ia64_p2m_expose)
ian@26 874 return -ENOSYS;
ian@26 875 if (p2m_initialized)
ian@26 876 return 0;
ian@26 877
ian@26 878 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
ian@26 879 error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
ian@26 880 if (error < 0)
ian@26 881 return error;
ian@26 882 #endif
ian@26 883
ian@26 884 lock_cpu_hotplug();
ian@26 885 if (p2m_initialized)
ian@26 886 goto out;
ian@26 887
ian@26 888 #ifdef CONFIG_DISCONTIGMEM
ian@26 889 p2m_min_low_pfn = min_low_pfn;
ian@26 890 p2m_max_low_pfn = max_low_pfn;
ian@26 891 #else
ian@26 892 p2m_min_low_pfn = 0;
ian@26 893 p2m_max_low_pfn = max_pfn;
ian@26 894 #endif
ian@26 895
ian@26 896 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
ian@26 897 if (xen_ia64_p2m_expose_use_dtr) {
alex@85 898 unsigned long page_size = 0;
ian@26 899 unsigned long granule_pfn = 0;
alex@197 900 p2m_size = p2m_table_size(p2m_max_low_pfn - p2m_min_low_pfn);
ian@26 901 for (i = 0;
ian@26 902 i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
ian@26 903 i++) {
ian@26 904 log_page_size = p2m_page_shifts[i];
ian@26 905 page_size = 1UL << log_page_size;
ian@26 906 if (page_size < p2m_size)
ian@26 907 continue;
ian@26 908
ian@26 909 granule_pfn = max(page_size >> PAGE_SHIFT,
alex@392 910 p2m_granule_pfn);
ian@26 911 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
alex@392 912 granule_pfn);
ian@26 913 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
alex@392 914 granule_pfn);
ian@26 915 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
ian@26 916 p2m_expose_size = num_pfn << PAGE_SHIFT;
alex@197 917 p2m_size = p2m_table_size(num_pfn);
alex@392 918 p2m_size = ROUNDUP(p2m_size,
alex@392 919 granule_pfn << PAGE_SHIFT);
ian@26 920 if (p2m_size == page_size)
ian@26 921 break;
ian@26 922 }
ian@26 923 if (p2m_size != page_size) {
ian@26 924 printk(KERN_ERR "p2m_size != page_size\n");
ian@26 925 error = -EINVAL;
ian@26 926 goto out;
ian@26 927 }
ian@26 928 align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
ian@26 929 } else
ian@26 930 #endif
ian@26 931 {
ian@26 932 BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
ian@26 933 p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
alex@392 934 p2m_granule_pfn);
alex@392 935 p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
alex@392 936 p2m_granule_pfn);
ian@26 937 num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
ian@26 938 p2m_expose_size = num_pfn << PAGE_SHIFT;
alex@197 939 p2m_size = p2m_table_size(num_pfn);
ian@26 940 p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
ian@26 941 align = max(privcmd_resource_align,
alex@392 942 p2m_granule_pfn << PAGE_SHIFT);
ian@26 943 }
ian@26 944
alex@392 945 /* use privcmd region */
ian@26 946 error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
alex@392 947 privcmd_resource_min, privcmd_resource_max,
alex@392 948 align, NULL, NULL);
ian@26 949 if (error) {
ian@26 950 printk(KERN_ERR P2M_PREFIX
ian@26 951 "can't allocate region for p2m exposure "
alex@62 952 "[0x%016lx, 0x%016lx] 0x%016lx\n",
ian@26 953 p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
ian@26 954 goto out;
ian@26 955 }
ian@26 956
ian@26 957 p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
ian@26 958 p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
ian@26 959
ian@26 960 error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
alex@392 961 p2m_assign_start_pfn,
alex@392 962 p2m_expose_size, p2m_granule_pfn);
ian@26 963 if (error) {
ian@26 964 printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
ian@26 965 error);
ian@26 966 printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
ian@26 967 "expose_size 0x%016lx granule 0x%016lx\n",
ian@26 968 p2m_convert_min_pfn, p2m_assign_start_pfn,
ian@26 969 p2m_expose_size, p2m_granule_pfn);;
ian@26 970 release_resource(&p2m_resource);
ian@26 971 goto out;
ian@26 972 }
ian@26 973 p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
ian@26 974 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
ian@26 975 if (xen_ia64_p2m_expose_use_dtr) {
ian@26 976 p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
alex@392 977 << PAGE_SHIFT);
ian@26 978 p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
alex@392 979 PAGE_KERNEL));
ian@26 980 p2m_itr_arg.log_page_size = log_page_size;
ian@26 981 smp_mb();
ian@26 982 smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
ian@26 983 p2m_itr(&p2m_itr_arg);
ian@26 984 }
ian@26 985 #endif
ian@26 986 smp_mb();
ian@26 987 p2m_initialized = 1;
ian@26 988 printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
ian@26 989 p2m_convert_min_pfn << PAGE_SHIFT,
alex@62 990 (p2m_convert_max_pfn << PAGE_SHIFT) + PAGE_SIZE);
ian@26 991 printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
ian@26 992 p2m_assign_start_pfn << PAGE_SHIFT,
alex@62 993 (p2m_assign_end_pfn << PAGE_SHIFT) + PAGE_SIZE,
ian@26 994 p2m_size / 1024);
ian@26 995 out:
ian@26 996 unlock_cpu_hotplug();
ian@26 997 return error;
ian@26 998 }
ian@26 999
ian@26 1000 #ifdef notyet
ian@26 1001 void
ian@26 1002 p2m_expose_cleanup(void)
ian@26 1003 {
ian@26 1004 BUG_ON(!p2m_initialized);
ian@26 1005 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
ian@26 1006 unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
ian@26 1007 #endif
ian@26 1008 release_resource(&p2m_resource);
ian@26 1009 }
ian@26 1010 #endif
ian@26 1011
ian@26 1012 static void
ian@26 1013 p2m_expose_resume(void)
ian@26 1014 {
ian@26 1015 int error;
ian@26 1016
ian@26 1017 if (!xen_ia64_p2m_expose || !p2m_initialized)
ian@26 1018 return;
ian@26 1019
ian@26 1020 /*
ian@26 1021 * We can't call {lock, unlock}_cpu_hotplug() because
ian@26 1022 * they require process context.
ian@26 1023 * We don't need them because we're the only one cpu and
ian@26 1024 * interrupts are masked when resume.
ian@26 1025 */
ian@26 1026 error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
alex@392 1027 p2m_assign_start_pfn,
alex@392 1028 p2m_expose_size, p2m_granule_pfn);
ian@26 1029 if (error) {
ian@26 1030 printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
ian@26 1031 error);
ian@26 1032 printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
ian@26 1033 "expose_size 0x%016lx granule 0x%016lx\n",
ian@26 1034 p2m_convert_min_pfn, p2m_assign_start_pfn,
ian@26 1035 p2m_expose_size, p2m_granule_pfn);;
ian@26 1036 p2m_initialized = 0;
ian@26 1037 smp_mb();
ian@26 1038 ia64_ptr(0x2, p2m_itr_arg.vaddr, p2m_itr_arg.log_page_size);
ian@26 1039
ian@26 1040 /*
ian@26 1041 * We can't call those clean up functions because they
ian@26 1042 * require process context.
ian@26 1043 */
ian@26 1044 #if 0
ian@26 1045 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
ian@26 1046 if (xen_ia64_p2m_expose_use_dtr)
ian@26 1047 unregister_cpu_notifier(
ian@26 1048 &p2m_expose_dtr_hotplug_notifier);
ian@26 1049 #endif
ian@26 1050 release_resource(&p2m_resource);
ian@26 1051 #endif
ian@26 1052 }
ian@26 1053 }
ian@26 1054
alex@392 1055 /* XXX inlinize? */
ian@26 1056 unsigned long
ian@26 1057 p2m_phystomach(unsigned long gpfn)
ian@26 1058 {
alex@392 1059 volatile const pte_t *pte;
ian@26 1060 unsigned long mfn;
ian@26 1061 unsigned long pteval;
ian@26 1062
ian@26 1063 if (!p2m_initialized ||
ian@26 1064 gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
ian@26 1065 /* || !pfn_valid(gpfn) */)
ian@26 1066 return INVALID_MFN;
ian@26 1067 pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
ian@26 1068
ian@26 1069 mfn = INVALID_MFN;
ian@26 1070 if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
alex@392 1071 pte_present(__pte(pteval)) &&
alex@392 1072 pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
ian@26 1073 mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
ian@26 1074
ian@26 1075 return mfn;
ian@26 1076 }
ian@26 1077
ian@26 1078 EXPORT_SYMBOL_GPL(p2m_initialized);
ian@26 1079 EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
ian@26 1080 EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
ian@26 1081 EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
ian@26 1082 EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
ian@26 1083 EXPORT_SYMBOL_GPL(p2m_pte);
ian@26 1084 EXPORT_SYMBOL_GPL(p2m_phystomach);
alex@197 1085
alex@392 1086 /**************************************************************************
alex@392 1087 * foreign domain p2m mapping
alex@392 1088 */
alex@197 1089 #include <asm/xen/xencomm.h>
alex@197 1090 #include <xen/public/privcmd.h>
alex@197 1091
alex@197 1092 struct foreign_p2m_private {
alex@197 1093 unsigned long gpfn;
alex@197 1094 domid_t domid;
alex@197 1095 };
alex@197 1096
alex@197 1097 static void
alex@392 1098 xen_foreign_p2m_unexpose(struct xen_ia64_privcmd_range *privcmd_range,
alex@392 1099 void *arg)
alex@197 1100 {
alex@392 1101 struct foreign_p2m_private *private = (struct foreign_p2m_private*)arg;
alex@197 1102 int ret;
alex@197 1103
alex@197 1104 privcmd_range->private = NULL;
alex@197 1105 privcmd_range->callback = NULL;
alex@197 1106
alex@197 1107 ret = HYPERVISOR_unexpose_foreign_p2m(private->gpfn, private->domid);
alex@197 1108 if (ret)
alex@197 1109 printk(KERN_WARNING
alex@197 1110 "unexpose_foreign_p2m hypercall failed.\n");
alex@197 1111 kfree(private);
alex@197 1112 }
alex@197 1113
alex@197 1114 int
alex@392 1115 xen_foreign_p2m_expose(privcmd_hypercall_t *hypercall)
alex@197 1116 {
alex@392 1117 /*
alex@392 1118 * hypercall->
alex@392 1119 * arg0: cmd = IA64_DOM0VP_expose_foreign_p2m
alex@392 1120 * arg1: va
alex@392 1121 * arg2: domid
alex@392 1122 * arg3: __user* memmap_info
alex@392 1123 * arg4: flags
alex@392 1124 */
alex@197 1125
alex@197 1126 int ret = 0;
alex@392 1127 struct mm_struct *mm = current->mm;
alex@197 1128
alex@197 1129 unsigned long vaddr = hypercall->arg[1];
alex@197 1130 domid_t domid = hypercall->arg[2];
alex@197 1131 struct xen_ia64_memmap_info __user *u_memmap_info =
alex@197 1132 (struct xen_ia64_memmap_info __user *)hypercall->arg[3];
alex@197 1133
alex@197 1134 struct xen_ia64_memmap_info memmap_info;
alex@197 1135 size_t memmap_size;
alex@392 1136 struct xen_ia64_memmap_info *k_memmap_info = NULL;
alex@197 1137 unsigned long max_gpfn;
alex@197 1138 unsigned long p2m_size;
alex@392 1139 struct resource *res;
alex@197 1140 unsigned long gpfn;
alex@197 1141
alex@392 1142 struct vm_area_struct *vma;
alex@392 1143 void *p;
alex@197 1144 unsigned long prev_src_gpfn_end;
alex@197 1145
alex@392 1146 struct xen_ia64_privcmd_vma *privcmd_vma;
alex@392 1147 struct xen_ia64_privcmd_range *privcmd_range;
alex@392 1148 struct foreign_p2m_private *private = NULL;
alex@197 1149
alex@197 1150 BUG_ON(hypercall->arg[0] != IA64_DOM0VP_expose_foreign_p2m);
alex@197 1151
alex@197 1152 private = kmalloc(sizeof(*private), GFP_KERNEL);
alex@197 1153 if (private == NULL)
alex@197 1154 goto kfree_out;
alex@197 1155
alex@197 1156 if (copy_from_user(&memmap_info, u_memmap_info, sizeof(memmap_info)))
alex@197 1157 return -EFAULT;
alex@197 1158 /* memmap_info integrity check */
alex@197 1159 if (memmap_info.efi_memdesc_size < sizeof(efi_memory_desc_t) ||
alex@197 1160 memmap_info.efi_memmap_size < memmap_info.efi_memdesc_size ||
alex@197 1161 (memmap_info.efi_memmap_size % memmap_info.efi_memdesc_size)
alex@197 1162 != 0) {
alex@197 1163 ret = -EINVAL;
alex@197 1164 goto kfree_out;
alex@197 1165 }
alex@197 1166
alex@197 1167 memmap_size = sizeof(*k_memmap_info) + memmap_info.efi_memmap_size;
alex@197 1168 k_memmap_info = kmalloc(memmap_size, GFP_KERNEL);
alex@197 1169 if (k_memmap_info == NULL)
alex@197 1170 return -ENOMEM;
alex@197 1171 if (copy_from_user(k_memmap_info, u_memmap_info, memmap_size)) {
alex@197 1172 ret = -EFAULT;
alex@197 1173 goto kfree_out;
alex@197 1174 }
alex@197 1175 /* k_memmap_info integrity check is done by the expose foreng p2m
alex@197 1176 hypercall */
alex@197 1177
alex@197 1178 max_gpfn = HYPERVISOR_memory_op(XENMEM_maximum_gpfn, &domid);
alex@197 1179 if (max_gpfn < 0) {
alex@197 1180 ret = max_gpfn;
alex@197 1181 goto kfree_out;
alex@197 1182 }
alex@197 1183 p2m_size = p2m_table_size(max_gpfn + 1);
alex@197 1184
alex@197 1185 down_write(&mm->mmap_sem);
alex@197 1186
alex@197 1187 vma = find_vma(mm, vaddr);
alex@197 1188 if (vma == NULL || vma->vm_ops != &xen_ia64_privcmd_vm_ops ||
alex@197 1189 vaddr != vma->vm_start ||
alex@197 1190 (vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_EXEC) ||
alex@197 1191 !privcmd_enforce_singleshot_mapping(vma))
alex@197 1192 goto mmap_out;
alex@197 1193
alex@197 1194 privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
alex@197 1195 res = privcmd_vma->range->res;
alex@197 1196 if (p2m_size > (res->end - res->start + 1) ||
alex@197 1197 p2m_size > vma->vm_end - vma->vm_start) {
alex@197 1198 ret = -EINVAL;
alex@197 1199 goto mmap_out;
alex@197 1200 }
alex@197 1201
alex@197 1202 gpfn = res->start >> PAGE_SHIFT;
alex@392 1203 /*
alex@392 1204 * arg0: dest_gpfn
alex@392 1205 * arg1: domid
alex@392 1206 * arg2: XEN_GUEST_HANDLE(char) buffer: memmap_info
alex@392 1207 * arg3: flags
alex@392 1208 * The hypercall checks its intergirty/simplfies it and
alex@392 1209 * copy it back for us.
alex@392 1210 */
alex@197 1211 ret = xencomm_arch_expose_foreign_p2m(gpfn, domid,
alex@197 1212 xencomm_map_no_alloc(k_memmap_info, memmap_size),
alex@197 1213 hypercall->arg[4]);
alex@197 1214 if (ret)
alex@197 1215 goto mmap_out;
alex@197 1216
alex@197 1217 privcmd_range = (struct xen_ia64_privcmd_range*)privcmd_vma->range;
alex@197 1218 prev_src_gpfn_end = 0;
alex@197 1219 for (p = k_memmap_info->memdesc;
alex@197 1220 p < (void*)&k_memmap_info->memdesc[0] +
alex@197 1221 k_memmap_info->efi_memmap_size;
alex@197 1222 p += k_memmap_info->efi_memdesc_size) {
alex@197 1223 efi_memory_desc_t* md = p;
alex@197 1224 unsigned long src_gpfn = md->phys_addr >> PAGE_SHIFT;
alex@197 1225 unsigned long src_gpfn_end =
alex@197 1226 (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
alex@197 1227 PAGE_SHIFT;
alex@197 1228 unsigned long num_src_gpfn;
alex@197 1229 unsigned long gpfn_offset;
alex@197 1230 unsigned long size;
alex@197 1231 unsigned int i;
alex@197 1232
alex@197 1233 if (src_gpfn <= prev_src_gpfn_end)
alex@197 1234 src_gpfn = prev_src_gpfn_end + 1;
alex@197 1235 if (src_gpfn_end <= prev_src_gpfn_end)
alex@197 1236 continue;
alex@197 1237
alex@197 1238 src_gpfn &= ~(PTRS_PER_PTE - 1);
alex@197 1239 src_gpfn_end = (src_gpfn_end + PTRS_PER_PTE - 1) &
alex@197 1240 ~(PTRS_PER_PTE - 1);
alex@197 1241 num_src_gpfn = src_gpfn_end - src_gpfn;
alex@197 1242 gpfn_offset = src_gpfn / PTRS_PER_PTE;
alex@197 1243 size = p2m_table_size(num_src_gpfn);
alex@197 1244
alex@197 1245 prev_src_gpfn_end = src_gpfn_end;
alex@197 1246 ret = remap_pfn_range(vma,
alex@197 1247 vaddr + (gpfn_offset << PAGE_SHIFT),
alex@197 1248 gpfn + gpfn_offset, size,
alex@197 1249 vma->vm_page_prot);
alex@197 1250 if (ret) {
alex@197 1251 for (i = 0; i < gpfn + gpfn_offset; i++) {
alex@392 1252 struct xen_ia64_privcmd_entry *entry =
alex@197 1253 &privcmd_range->entries[i];
alex@197 1254 BUG_ON(atomic_read(&entry->map_count) != 1 &&
alex@197 1255 atomic_read(&entry->map_count) != 0);
alex@197 1256 atomic_set(&entry->map_count, 0);
alex@197 1257 entry->gpfn = INVALID_GPFN;
alex@197 1258 }
alex@197 1259 (void)HYPERVISOR_unexpose_foreign_p2m(gpfn, domid);
alex@197 1260 goto mmap_out;
alex@197 1261 }
alex@197 1262
alex@197 1263 for (i = gpfn_offset;
alex@197 1264 i < gpfn_offset + (size >> PAGE_SHIFT);
alex@197 1265 i++) {
alex@392 1266 struct xen_ia64_privcmd_entry *entry =
alex@197 1267 &privcmd_range->entries[i];
alex@197 1268 BUG_ON(atomic_read(&entry->map_count) != 0);
alex@197 1269 BUG_ON(entry->gpfn != INVALID_GPFN);
alex@197 1270 atomic_inc(&entry->map_count);
alex@197 1271 entry->gpfn = gpfn + i;
alex@197 1272 }
alex@197 1273 }
alex@197 1274
alex@197 1275 private->gpfn = gpfn;
alex@197 1276 private->domid = domid;
alex@197 1277
alex@197 1278 privcmd_range->callback = &xen_foreign_p2m_unexpose;
alex@197 1279 privcmd_range->private = private;
alex@197 1280
alex@197 1281 mmap_out:
alex@197 1282 up_write(&mm->mmap_sem);
alex@197 1283 kfree_out:
alex@197 1284 kfree(k_memmap_info);
alex@197 1285 if (ret != 0)
alex@197 1286 kfree(private);
alex@197 1287 return ret;
alex@197 1288 }
ian@26 1289 #endif
ian@26 1290
alex@392 1291 /**************************************************************************
alex@392 1292 * for xenoprof
alex@392 1293 */
ian@26 1294 struct resource*
ian@26 1295 xen_ia64_allocate_resource(unsigned long size)
ian@26 1296 {
alex@392 1297 struct resource *res;
ian@26 1298 int error;
ian@26 1299
alex@43 1300 res = kzalloc(sizeof(*res), GFP_KERNEL);
ian@26 1301 if (res == NULL)
ian@26 1302 return ERR_PTR(-ENOMEM);
ian@26 1303
ian@26 1304 res->name = "Xen";
ian@26 1305 res->flags = IORESOURCE_MEM;
ian@26 1306 error = allocate_resource(&iomem_resource, res, PAGE_ALIGN(size),
alex@392 1307 privcmd_resource_min, privcmd_resource_max,
alex@392 1308 IA64_GRANULE_SIZE, NULL, NULL);
ian@26 1309 if (error) {
ian@26 1310 kfree(res);
ian@26 1311 return ERR_PTR(error);
ian@26 1312 }
ian@26 1313 return res;
ian@26 1314 }
ian@26 1315 EXPORT_SYMBOL_GPL(xen_ia64_allocate_resource);
ian@26 1316
ian@26 1317 void
alex@392 1318 xen_ia64_release_resource(struct resource *res)
ian@26 1319 {
ian@26 1320 release_resource(res);
ian@26 1321 kfree(res);
ian@26 1322 }
ian@26 1323 EXPORT_SYMBOL_GPL(xen_ia64_release_resource);
ian@26 1324
ian@26 1325 void
alex@392 1326 xen_ia64_unmap_resource(struct resource *res)
ian@26 1327 {
ian@26 1328 unsigned long gpfn = res->start >> PAGE_SHIFT;
ian@26 1329 unsigned long nr_pages = (res->end - res->start) >> PAGE_SHIFT;
ian@26 1330 unsigned long i;
ian@26 1331
ian@26 1332 for (i = 0; i < nr_pages; i++) {
ian@26 1333 int error = HYPERVISOR_zap_physmap(gpfn + i, 0);
ian@26 1334 if (error)
ian@26 1335 printk(KERN_ERR
ian@26 1336 "%s:%d zap_phsymap failed %d gpfn %lx\n",
ian@26 1337 __func__, __LINE__, error, gpfn + i);
ian@26 1338 }
ian@26 1339 xen_ia64_release_resource(res);
ian@26 1340 }
ian@26 1341 EXPORT_SYMBOL_GPL(xen_ia64_unmap_resource);
ian@26 1342
alex@392 1343 /**************************************************************************
alex@392 1344 * opt feature
alex@392 1345 */
alex@256 1346 void
alex@256 1347 xen_ia64_enable_opt_feature(void)
alex@256 1348 {
alex@256 1349 /* Enable region 7 identity map optimizations in Xen */
alex@256 1350 struct xen_ia64_opt_feature optf;
alex@256 1351
alex@256 1352 optf.cmd = XEN_IA64_OPTF_IDENT_MAP_REG7;
alex@256 1353 optf.on = XEN_IA64_OPTF_ON;
alex@256 1354 optf.pgprot = pgprot_val(PAGE_KERNEL);
alex@256 1355 optf.key = 0; /* No key on linux. */
alex@256 1356 HYPERVISOR_opt_feature(&optf);
alex@256 1357 }
alex@256 1358
alex@392 1359 /**************************************************************************
alex@392 1360 * suspend/resume
alex@392 1361 */
ian@26 1362 void
ian@26 1363 xen_post_suspend(int suspend_cancelled)
ian@26 1364 {
ian@26 1365 if (suspend_cancelled)
ian@26 1366 return;
ian@26 1367
ian@26 1368 p2m_expose_resume();
alex@256 1369 xen_ia64_enable_opt_feature();
ian@26 1370 /* add more if necessary */
ian@26 1371 }