ia64/linux-2.6.18-xen.hg

view arch/i386/mm/hypervisor.c @ 892:485fe5efa4ff

linux/blktap2: allow to build as module

... and also allow to interact with blkback when that's also built as
a module.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 04 10:32:34 2009 +0100 (2009-06-04)
parents 5e1269aa5c29
children f994bfe9b93b
line source
1 /******************************************************************************
2 * mm/hypervisor.c
3 *
4 * Update page tables via the hypervisor.
5 *
6 * Copyright (c) 2002-2004, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
33 #include <linux/sched.h>
34 #include <linux/mm.h>
35 #include <linux/vmalloc.h>
36 #include <asm/page.h>
37 #include <asm/pgtable.h>
38 #include <asm/hypervisor.h>
39 #include <xen/balloon.h>
40 #include <xen/features.h>
41 #include <xen/interface/memory.h>
42 #include <linux/module.h>
43 #include <linux/percpu.h>
44 #include <asm/tlbflush.h>
45 #include <linux/highmem.h>
47 void xen_l1_entry_update(pte_t *ptr, pte_t val)
48 {
49 mmu_update_t u;
50 u.ptr = ptep_to_machine(ptr);
51 u.val = __pte_val(val);
52 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
53 }
54 EXPORT_SYMBOL_GPL(xen_l1_entry_update);
56 void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
57 {
58 mmu_update_t u;
59 u.ptr = virt_to_machine(ptr);
60 u.val = __pmd_val(val);
61 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
62 }
64 #if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
65 void xen_l3_entry_update(pud_t *ptr, pud_t val)
66 {
67 mmu_update_t u;
68 u.ptr = virt_to_machine(ptr);
69 u.val = __pud_val(val);
70 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
71 }
72 #endif
74 #ifdef CONFIG_X86_64
75 void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
76 {
77 mmu_update_t u;
78 u.ptr = virt_to_machine(ptr);
79 u.val = __pgd_val(val);
80 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
81 }
82 #endif /* CONFIG_X86_64 */
84 void xen_pt_switch(unsigned long ptr)
85 {
86 struct mmuext_op op;
87 op.cmd = MMUEXT_NEW_BASEPTR;
88 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
89 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
90 }
92 void xen_new_user_pt(unsigned long ptr)
93 {
94 struct mmuext_op op;
95 op.cmd = MMUEXT_NEW_USER_BASEPTR;
96 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
97 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
98 }
100 void xen_tlb_flush(void)
101 {
102 struct mmuext_op op;
103 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
104 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
105 }
106 EXPORT_SYMBOL(xen_tlb_flush);
108 void xen_invlpg(unsigned long ptr)
109 {
110 struct mmuext_op op;
111 op.cmd = MMUEXT_INVLPG_LOCAL;
112 op.arg1.linear_addr = ptr & PAGE_MASK;
113 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
114 }
115 EXPORT_SYMBOL(xen_invlpg);
117 #ifdef CONFIG_SMP
119 void xen_tlb_flush_all(void)
120 {
121 struct mmuext_op op;
122 op.cmd = MMUEXT_TLB_FLUSH_ALL;
123 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
124 }
125 EXPORT_SYMBOL_GPL(xen_tlb_flush_all);
127 void xen_tlb_flush_mask(cpumask_t *mask)
128 {
129 struct mmuext_op op;
130 if ( cpus_empty(*mask) )
131 return;
132 op.cmd = MMUEXT_TLB_FLUSH_MULTI;
133 set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
134 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
135 }
136 EXPORT_SYMBOL_GPL(xen_tlb_flush_mask);
138 void xen_invlpg_all(unsigned long ptr)
139 {
140 struct mmuext_op op;
141 op.cmd = MMUEXT_INVLPG_ALL;
142 op.arg1.linear_addr = ptr & PAGE_MASK;
143 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
144 }
146 void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
147 {
148 struct mmuext_op op;
149 if ( cpus_empty(*mask) )
150 return;
151 op.cmd = MMUEXT_INVLPG_MULTI;
152 op.arg1.linear_addr = ptr & PAGE_MASK;
153 set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
154 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
155 }
157 #endif /* CONFIG_SMP */
159 void xen_pgd_pin(unsigned long ptr)
160 {
161 struct mmuext_op op;
162 #ifdef CONFIG_X86_64
163 op.cmd = MMUEXT_PIN_L4_TABLE;
164 #elif defined(CONFIG_X86_PAE)
165 op.cmd = MMUEXT_PIN_L3_TABLE;
166 #else
167 op.cmd = MMUEXT_PIN_L2_TABLE;
168 #endif
169 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
170 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
171 }
173 void xen_pgd_unpin(unsigned long ptr)
174 {
175 struct mmuext_op op;
176 op.cmd = MMUEXT_UNPIN_TABLE;
177 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
178 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
179 }
181 void xen_set_ldt(const void *ptr, unsigned int ents)
182 {
183 struct mmuext_op op;
184 op.cmd = MMUEXT_SET_LDT;
185 op.arg1.linear_addr = (unsigned long)ptr;
186 op.arg2.nr_ents = ents;
187 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
188 }
190 /* Protected by balloon_lock. */
191 #define MAX_CONTIG_ORDER 9 /* 2MB */
192 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
193 static unsigned long limited_frames[1<<MAX_CONTIG_ORDER];
194 static multicall_entry_t cr_mcl[1<<MAX_CONTIG_ORDER];
196 /* Ensure multi-page extents are contiguous in machine memory. */
197 int xen_create_contiguous_region(
198 unsigned long vstart, unsigned int order, unsigned int address_bits)
199 {
200 unsigned long *in_frames = discontig_frames, out_frame;
201 unsigned long frame, flags;
202 unsigned int i;
203 int rc, success;
204 struct xen_memory_exchange exchange = {
205 .in = {
206 .nr_extents = 1UL << order,
207 .extent_order = 0,
208 .domid = DOMID_SELF
209 },
210 .out = {
211 .nr_extents = 1,
212 .extent_order = order,
213 .address_bits = address_bits,
214 .domid = DOMID_SELF
215 }
216 };
218 /*
219 * Currently an auto-translated guest will not perform I/O, nor will
220 * it require PAE page directories below 4GB. Therefore any calls to
221 * this function are redundant and can be ignored.
222 */
223 if (xen_feature(XENFEAT_auto_translated_physmap))
224 return 0;
226 if (unlikely(order > MAX_CONTIG_ORDER))
227 return -ENOMEM;
229 set_xen_guest_handle(exchange.in.extent_start, in_frames);
230 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
232 scrub_pages((void *)vstart, 1 << order);
234 balloon_lock(flags);
236 /* 1. Zap current PTEs, remembering MFNs. */
237 for (i = 0; i < (1U<<order); i++) {
238 in_frames[i] = pfn_to_mfn((__pa(vstart) >> PAGE_SHIFT) + i);
239 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
240 __pte_ma(0), 0);
241 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
242 INVALID_P2M_ENTRY);
243 }
244 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
245 BUG();
247 /* 2. Get a new contiguous memory extent. */
248 out_frame = __pa(vstart) >> PAGE_SHIFT;
249 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
250 success = (exchange.nr_exchanged == (1UL << order));
251 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
252 BUG_ON(success && (rc != 0));
253 #if CONFIG_XEN_COMPAT <= 0x030002
254 if (unlikely(rc == -ENOSYS)) {
255 /* Compatibility when XENMEM_exchange is unsupported. */
256 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
257 &exchange.in) != (1UL << order))
258 BUG();
259 success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
260 &exchange.out) == 1);
261 if (!success) {
262 /* Couldn't get special memory: fall back to normal. */
263 for (i = 0; i < (1U<<order); i++)
264 in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
265 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
266 &exchange.in) != (1UL<<order))
267 BUG();
268 }
269 }
270 #endif
272 /* 3. Map the new extent in place of old pages. */
273 for (i = 0; i < (1U<<order); i++) {
274 frame = success ? (out_frame + i) : in_frames[i];
275 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
276 pfn_pte_ma(frame, PAGE_KERNEL), 0);
277 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
278 }
280 cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
281 ? UVMF_TLB_FLUSH|UVMF_ALL
282 : UVMF_INVLPG|UVMF_ALL;
283 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
284 BUG();
286 balloon_unlock(flags);
288 return success ? 0 : -ENOMEM;
289 }
290 EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
292 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
293 {
294 unsigned long *out_frames = discontig_frames, in_frame;
295 unsigned long frame, flags;
296 unsigned int i;
297 int rc, success;
298 struct xen_memory_exchange exchange = {
299 .in = {
300 .nr_extents = 1,
301 .extent_order = order,
302 .domid = DOMID_SELF
303 },
304 .out = {
305 .nr_extents = 1UL << order,
306 .extent_order = 0,
307 .domid = DOMID_SELF
308 }
309 };
311 if (xen_feature(XENFEAT_auto_translated_physmap))
312 return;
314 if (unlikely(order > MAX_CONTIG_ORDER))
315 return;
317 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
318 set_xen_guest_handle(exchange.out.extent_start, out_frames);
320 scrub_pages((void *)vstart, 1 << order);
322 balloon_lock(flags);
324 /* 1. Find start MFN of contiguous extent. */
325 in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT);
327 /* 2. Zap current PTEs. */
328 for (i = 0; i < (1U<<order); i++) {
329 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
330 __pte_ma(0), 0);
331 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
332 INVALID_P2M_ENTRY);
333 out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
334 }
335 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
336 BUG();
338 /* 3. Do the exchange for non-contiguous MFNs. */
339 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
340 success = (exchange.nr_exchanged == 1);
341 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
342 BUG_ON(success && (rc != 0));
343 #if CONFIG_XEN_COMPAT <= 0x030002
344 if (unlikely(rc == -ENOSYS)) {
345 /* Compatibility when XENMEM_exchange is unsupported. */
346 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
347 &exchange.in) != 1)
348 BUG();
349 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
350 &exchange.out) != (1UL << order))
351 BUG();
352 success = 1;
353 }
354 #endif
356 /* 4. Map new pages in place of old pages. */
357 for (i = 0; i < (1U<<order); i++) {
358 frame = success ? out_frames[i] : (in_frame + i);
359 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
360 pfn_pte_ma(frame, PAGE_KERNEL), 0);
361 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
362 }
364 cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
365 ? UVMF_TLB_FLUSH|UVMF_ALL
366 : UVMF_INVLPG|UVMF_ALL;
367 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
368 BUG();
370 balloon_unlock(flags);
371 }
372 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
374 static void undo_limit_pages(struct page *pages, unsigned int order)
375 {
376 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
377 BUG_ON(order > MAX_CONTIG_ORDER);
378 xen_limit_pages_to_max_mfn(pages, order, 0);
379 ClearPageForeign(pages);
380 __free_pages(pages, order);
381 }
383 int xen_limit_pages_to_max_mfn(
384 struct page *pages, unsigned int order, unsigned int address_bits)
385 {
386 unsigned long flags, frame;
387 unsigned long *in_frames = discontig_frames, *out_frames = limited_frames;
388 struct page *page;
389 unsigned int i, n, nr_mcl;
390 int rc, success;
391 DECLARE_BITMAP(limit_map, 1 << MAX_CONTIG_ORDER);
393 struct xen_memory_exchange exchange = {
394 .in = {
395 .extent_order = 0,
396 .domid = DOMID_SELF
397 },
398 .out = {
399 .extent_order = 0,
400 .address_bits = address_bits,
401 .domid = DOMID_SELF
402 }
403 };
405 if (xen_feature(XENFEAT_auto_translated_physmap))
406 return 0;
408 if (unlikely(order > MAX_CONTIG_ORDER))
409 return -ENOMEM;
411 if (address_bits) {
412 if (address_bits < PAGE_SHIFT)
413 return -EINVAL;
414 bitmap_zero(limit_map, 1U << order);
415 } else if (order) {
416 BUILD_BUG_ON(sizeof(pages->index) != sizeof(*limit_map));
417 for (i = 0; i < BITS_TO_LONGS(1U << order); ++i)
418 limit_map[i] = pages[i + 1].index;
419 } else
420 __set_bit(0, limit_map);
422 set_xen_guest_handle(exchange.in.extent_start, in_frames);
423 set_xen_guest_handle(exchange.out.extent_start, out_frames);
425 /* 0. Scrub the pages. */
426 for (i = 0, n = 0; i < 1U<<order ; i++) {
427 page = &pages[i];
428 if (address_bits) {
429 if (!(pfn_to_mfn(page_to_pfn(page)) >> (address_bits - PAGE_SHIFT)))
430 continue;
431 __set_bit(i, limit_map);
432 }
434 if (!PageHighMem(page))
435 scrub_pages(page_address(page), 1);
436 #ifdef CONFIG_XEN_SCRUB_PAGES
437 else {
438 scrub_pages(kmap(page), 1);
439 kunmap(page);
440 ++n;
441 }
442 #endif
443 }
444 if (bitmap_empty(limit_map, 1U << order))
445 return 0;
447 if (n)
448 kmap_flush_unused();
450 balloon_lock(flags);
452 /* 1. Zap current PTEs (if any), remembering MFNs. */
453 for (i = 0, n = 0, nr_mcl = 0; i < (1U<<order); i++) {
454 if(!test_bit(i, limit_map))
455 continue;
456 page = &pages[i];
458 out_frames[n] = page_to_pfn(page);
459 in_frames[n] = pfn_to_mfn(out_frames[n]);
461 if (!PageHighMem(page))
462 MULTI_update_va_mapping(cr_mcl + nr_mcl++,
463 (unsigned long)page_address(page),
464 __pte_ma(0), 0);
466 set_phys_to_machine(out_frames[n], INVALID_P2M_ENTRY);
467 ++n;
468 }
469 if (nr_mcl && HYPERVISOR_multicall_check(cr_mcl, nr_mcl, NULL))
470 BUG();
472 /* 2. Get new memory below the required limit. */
473 exchange.in.nr_extents = n;
474 exchange.out.nr_extents = n;
475 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
476 success = (exchange.nr_exchanged == n);
477 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
478 BUG_ON(success && (rc != 0));
479 #if CONFIG_XEN_COMPAT <= 0x030002
480 if (unlikely(rc == -ENOSYS)) {
481 /* Compatibility when XENMEM_exchange is unsupported. */
482 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
483 &exchange.in) != n)
484 BUG();
485 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
486 &exchange.out) != n)
487 BUG();
488 success = 1;
489 }
490 #endif
492 /* 3. Map the new pages in place of old pages. */
493 for (i = 0, n = 0, nr_mcl = 0; i < (1U<<order); i++) {
494 if(!test_bit(i, limit_map))
495 continue;
496 page = &pages[i];
498 frame = success ? out_frames[n] : in_frames[n];
500 if (!PageHighMem(page))
501 MULTI_update_va_mapping(cr_mcl + nr_mcl++,
502 (unsigned long)page_address(page),
503 pfn_pte_ma(frame, PAGE_KERNEL), 0);
505 set_phys_to_machine(page_to_pfn(page), frame);
506 ++n;
507 }
508 if (nr_mcl) {
509 cr_mcl[nr_mcl - 1].args[MULTI_UVMFLAGS_INDEX] = order
510 ? UVMF_TLB_FLUSH|UVMF_ALL
511 : UVMF_INVLPG|UVMF_ALL;
512 if (HYPERVISOR_multicall_check(cr_mcl, nr_mcl, NULL))
513 BUG();
514 }
516 balloon_unlock(flags);
518 if (!success)
519 return -ENOMEM;
521 if (address_bits) {
522 if (order) {
523 BUILD_BUG_ON(sizeof(*limit_map) != sizeof(pages->index));
524 for (i = 0; i < BITS_TO_LONGS(1U << order); ++i)
525 pages[i + 1].index = limit_map[i];
526 }
527 SetPageForeign(pages, undo_limit_pages);
528 }
530 return 0;
531 }
532 EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn);
534 #ifdef __i386__
535 int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
536 {
537 __u32 *lp = (__u32 *)((char *)ldt + entry * 8);
538 maddr_t mach_lp = arbitrary_virt_to_machine(lp);
539 return HYPERVISOR_update_descriptor(
540 mach_lp, (u64)entry_a | ((u64)entry_b<<32));
541 }
542 #endif
544 #define MAX_BATCHED_FULL_PTES 32
546 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
547 unsigned long addr, unsigned long end, pgprot_t newprot)
548 {
549 int rc = 0, i = 0;
550 mmu_update_t u[MAX_BATCHED_FULL_PTES];
551 pte_t *pte;
552 spinlock_t *ptl;
554 if (!xen_feature(XENFEAT_mmu_pt_update_preserve_ad))
555 return 0;
557 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
558 do {
559 if (pte_present(*pte)) {
560 u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
561 | ((unsigned long)pte & ~PAGE_MASK)
562 | MMU_PT_UPDATE_PRESERVE_AD;
563 u[i].val = __pte_val(pte_modify(*pte, newprot));
564 if (++i == MAX_BATCHED_FULL_PTES) {
565 if ((rc = HYPERVISOR_mmu_update(
566 &u[0], i, NULL, DOMID_SELF)) != 0)
567 break;
568 i = 0;
569 }
570 }
571 } while (pte++, addr += PAGE_SIZE, addr != end);
572 if (i)
573 rc = HYPERVISOR_mmu_update( &u[0], i, NULL, DOMID_SELF);
574 pte_unmap_unlock(pte - 1, ptl);
575 BUG_ON(rc && rc != -ENOSYS);
576 return !rc;
577 }