ia64/linux-2.6.18-xen.hg

view arch/i386/mm/hypervisor.c @ 761:5e1269aa5c29

blktap, gntdev: fix highpte handling

In case of highpte, virt_to_machine() can't be used. Introduce
ptep_to_machine() and use it, also to simplify xen_l1_entry_update().

Original patch from: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Dec 10 13:32:32 2008 +0000 (2008-12-10)
parents 39a8680e7a70
children 485fe5efa4ff
line source
1 /******************************************************************************
2 * mm/hypervisor.c
3 *
4 * Update page tables via the hypervisor.
5 *
6 * Copyright (c) 2002-2004, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
33 #include <linux/sched.h>
34 #include <linux/mm.h>
35 #include <linux/vmalloc.h>
36 #include <asm/page.h>
37 #include <asm/pgtable.h>
38 #include <asm/hypervisor.h>
39 #include <xen/balloon.h>
40 #include <xen/features.h>
41 #include <xen/interface/memory.h>
42 #include <linux/module.h>
43 #include <linux/percpu.h>
44 #include <asm/tlbflush.h>
45 #include <linux/highmem.h>
47 void xen_l1_entry_update(pte_t *ptr, pte_t val)
48 {
49 mmu_update_t u;
50 u.ptr = ptep_to_machine(ptr);
51 u.val = __pte_val(val);
52 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
53 }
54 EXPORT_SYMBOL_GPL(xen_l1_entry_update);
56 void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
57 {
58 mmu_update_t u;
59 u.ptr = virt_to_machine(ptr);
60 u.val = __pmd_val(val);
61 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
62 }
64 #if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
65 void xen_l3_entry_update(pud_t *ptr, pud_t val)
66 {
67 mmu_update_t u;
68 u.ptr = virt_to_machine(ptr);
69 u.val = __pud_val(val);
70 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
71 }
72 #endif
74 #ifdef CONFIG_X86_64
75 void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
76 {
77 mmu_update_t u;
78 u.ptr = virt_to_machine(ptr);
79 u.val = __pgd_val(val);
80 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
81 }
82 #endif /* CONFIG_X86_64 */
84 void xen_pt_switch(unsigned long ptr)
85 {
86 struct mmuext_op op;
87 op.cmd = MMUEXT_NEW_BASEPTR;
88 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
89 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
90 }
92 void xen_new_user_pt(unsigned long ptr)
93 {
94 struct mmuext_op op;
95 op.cmd = MMUEXT_NEW_USER_BASEPTR;
96 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
97 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
98 }
100 void xen_tlb_flush(void)
101 {
102 struct mmuext_op op;
103 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
104 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
105 }
106 EXPORT_SYMBOL(xen_tlb_flush);
108 void xen_invlpg(unsigned long ptr)
109 {
110 struct mmuext_op op;
111 op.cmd = MMUEXT_INVLPG_LOCAL;
112 op.arg1.linear_addr = ptr & PAGE_MASK;
113 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
114 }
115 EXPORT_SYMBOL(xen_invlpg);
117 #ifdef CONFIG_SMP
119 void xen_tlb_flush_all(void)
120 {
121 struct mmuext_op op;
122 op.cmd = MMUEXT_TLB_FLUSH_ALL;
123 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
124 }
126 void xen_tlb_flush_mask(cpumask_t *mask)
127 {
128 struct mmuext_op op;
129 if ( cpus_empty(*mask) )
130 return;
131 op.cmd = MMUEXT_TLB_FLUSH_MULTI;
132 set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
133 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
134 }
136 void xen_invlpg_all(unsigned long ptr)
137 {
138 struct mmuext_op op;
139 op.cmd = MMUEXT_INVLPG_ALL;
140 op.arg1.linear_addr = ptr & PAGE_MASK;
141 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
142 }
144 void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
145 {
146 struct mmuext_op op;
147 if ( cpus_empty(*mask) )
148 return;
149 op.cmd = MMUEXT_INVLPG_MULTI;
150 op.arg1.linear_addr = ptr & PAGE_MASK;
151 set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
152 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
153 }
155 #endif /* CONFIG_SMP */
157 void xen_pgd_pin(unsigned long ptr)
158 {
159 struct mmuext_op op;
160 #ifdef CONFIG_X86_64
161 op.cmd = MMUEXT_PIN_L4_TABLE;
162 #elif defined(CONFIG_X86_PAE)
163 op.cmd = MMUEXT_PIN_L3_TABLE;
164 #else
165 op.cmd = MMUEXT_PIN_L2_TABLE;
166 #endif
167 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
168 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
169 }
171 void xen_pgd_unpin(unsigned long ptr)
172 {
173 struct mmuext_op op;
174 op.cmd = MMUEXT_UNPIN_TABLE;
175 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
176 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
177 }
179 void xen_set_ldt(const void *ptr, unsigned int ents)
180 {
181 struct mmuext_op op;
182 op.cmd = MMUEXT_SET_LDT;
183 op.arg1.linear_addr = (unsigned long)ptr;
184 op.arg2.nr_ents = ents;
185 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
186 }
188 /* Protected by balloon_lock. */
189 #define MAX_CONTIG_ORDER 9 /* 2MB */
190 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
191 static unsigned long limited_frames[1<<MAX_CONTIG_ORDER];
192 static multicall_entry_t cr_mcl[1<<MAX_CONTIG_ORDER];
194 /* Ensure multi-page extents are contiguous in machine memory. */
195 int xen_create_contiguous_region(
196 unsigned long vstart, unsigned int order, unsigned int address_bits)
197 {
198 unsigned long *in_frames = discontig_frames, out_frame;
199 unsigned long frame, flags;
200 unsigned int i;
201 int rc, success;
202 struct xen_memory_exchange exchange = {
203 .in = {
204 .nr_extents = 1UL << order,
205 .extent_order = 0,
206 .domid = DOMID_SELF
207 },
208 .out = {
209 .nr_extents = 1,
210 .extent_order = order,
211 .address_bits = address_bits,
212 .domid = DOMID_SELF
213 }
214 };
216 /*
217 * Currently an auto-translated guest will not perform I/O, nor will
218 * it require PAE page directories below 4GB. Therefore any calls to
219 * this function are redundant and can be ignored.
220 */
221 if (xen_feature(XENFEAT_auto_translated_physmap))
222 return 0;
224 if (unlikely(order > MAX_CONTIG_ORDER))
225 return -ENOMEM;
227 set_xen_guest_handle(exchange.in.extent_start, in_frames);
228 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
230 scrub_pages((void *)vstart, 1 << order);
232 balloon_lock(flags);
234 /* 1. Zap current PTEs, remembering MFNs. */
235 for (i = 0; i < (1U<<order); i++) {
236 in_frames[i] = pfn_to_mfn((__pa(vstart) >> PAGE_SHIFT) + i);
237 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
238 __pte_ma(0), 0);
239 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
240 INVALID_P2M_ENTRY);
241 }
242 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
243 BUG();
245 /* 2. Get a new contiguous memory extent. */
246 out_frame = __pa(vstart) >> PAGE_SHIFT;
247 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
248 success = (exchange.nr_exchanged == (1UL << order));
249 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
250 BUG_ON(success && (rc != 0));
251 #if CONFIG_XEN_COMPAT <= 0x030002
252 if (unlikely(rc == -ENOSYS)) {
253 /* Compatibility when XENMEM_exchange is unsupported. */
254 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
255 &exchange.in) != (1UL << order))
256 BUG();
257 success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
258 &exchange.out) == 1);
259 if (!success) {
260 /* Couldn't get special memory: fall back to normal. */
261 for (i = 0; i < (1U<<order); i++)
262 in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
263 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
264 &exchange.in) != (1UL<<order))
265 BUG();
266 }
267 }
268 #endif
270 /* 3. Map the new extent in place of old pages. */
271 for (i = 0; i < (1U<<order); i++) {
272 frame = success ? (out_frame + i) : in_frames[i];
273 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
274 pfn_pte_ma(frame, PAGE_KERNEL), 0);
275 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
276 }
278 cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
279 ? UVMF_TLB_FLUSH|UVMF_ALL
280 : UVMF_INVLPG|UVMF_ALL;
281 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
282 BUG();
284 balloon_unlock(flags);
286 return success ? 0 : -ENOMEM;
287 }
288 EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
290 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
291 {
292 unsigned long *out_frames = discontig_frames, in_frame;
293 unsigned long frame, flags;
294 unsigned int i;
295 int rc, success;
296 struct xen_memory_exchange exchange = {
297 .in = {
298 .nr_extents = 1,
299 .extent_order = order,
300 .domid = DOMID_SELF
301 },
302 .out = {
303 .nr_extents = 1UL << order,
304 .extent_order = 0,
305 .domid = DOMID_SELF
306 }
307 };
309 if (xen_feature(XENFEAT_auto_translated_physmap))
310 return;
312 if (unlikely(order > MAX_CONTIG_ORDER))
313 return;
315 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
316 set_xen_guest_handle(exchange.out.extent_start, out_frames);
318 scrub_pages((void *)vstart, 1 << order);
320 balloon_lock(flags);
322 /* 1. Find start MFN of contiguous extent. */
323 in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT);
325 /* 2. Zap current PTEs. */
326 for (i = 0; i < (1U<<order); i++) {
327 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
328 __pte_ma(0), 0);
329 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
330 INVALID_P2M_ENTRY);
331 out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
332 }
333 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
334 BUG();
336 /* 3. Do the exchange for non-contiguous MFNs. */
337 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
338 success = (exchange.nr_exchanged == 1);
339 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
340 BUG_ON(success && (rc != 0));
341 #if CONFIG_XEN_COMPAT <= 0x030002
342 if (unlikely(rc == -ENOSYS)) {
343 /* Compatibility when XENMEM_exchange is unsupported. */
344 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
345 &exchange.in) != 1)
346 BUG();
347 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
348 &exchange.out) != (1UL << order))
349 BUG();
350 success = 1;
351 }
352 #endif
354 /* 4. Map new pages in place of old pages. */
355 for (i = 0; i < (1U<<order); i++) {
356 frame = success ? out_frames[i] : (in_frame + i);
357 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
358 pfn_pte_ma(frame, PAGE_KERNEL), 0);
359 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
360 }
362 cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
363 ? UVMF_TLB_FLUSH|UVMF_ALL
364 : UVMF_INVLPG|UVMF_ALL;
365 if (HYPERVISOR_multicall_check(cr_mcl, i, NULL))
366 BUG();
368 balloon_unlock(flags);
369 }
370 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
372 static void undo_limit_pages(struct page *pages, unsigned int order)
373 {
374 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
375 BUG_ON(order > MAX_CONTIG_ORDER);
376 xen_limit_pages_to_max_mfn(pages, order, 0);
377 ClearPageForeign(pages);
378 __free_pages(pages, order);
379 }
381 int xen_limit_pages_to_max_mfn(
382 struct page *pages, unsigned int order, unsigned int address_bits)
383 {
384 unsigned long flags, frame;
385 unsigned long *in_frames = discontig_frames, *out_frames = limited_frames;
386 struct page *page;
387 unsigned int i, n, nr_mcl;
388 int rc, success;
389 DECLARE_BITMAP(limit_map, 1 << MAX_CONTIG_ORDER);
391 struct xen_memory_exchange exchange = {
392 .in = {
393 .extent_order = 0,
394 .domid = DOMID_SELF
395 },
396 .out = {
397 .extent_order = 0,
398 .address_bits = address_bits,
399 .domid = DOMID_SELF
400 }
401 };
403 if (xen_feature(XENFEAT_auto_translated_physmap))
404 return 0;
406 if (unlikely(order > MAX_CONTIG_ORDER))
407 return -ENOMEM;
409 if (address_bits) {
410 if (address_bits < PAGE_SHIFT)
411 return -EINVAL;
412 bitmap_zero(limit_map, 1U << order);
413 } else if (order) {
414 BUILD_BUG_ON(sizeof(pages->index) != sizeof(*limit_map));
415 for (i = 0; i < BITS_TO_LONGS(1U << order); ++i)
416 limit_map[i] = pages[i + 1].index;
417 } else
418 __set_bit(0, limit_map);
420 set_xen_guest_handle(exchange.in.extent_start, in_frames);
421 set_xen_guest_handle(exchange.out.extent_start, out_frames);
423 /* 0. Scrub the pages. */
424 for (i = 0, n = 0; i < 1U<<order ; i++) {
425 page = &pages[i];
426 if (address_bits) {
427 if (!(pfn_to_mfn(page_to_pfn(page)) >> (address_bits - PAGE_SHIFT)))
428 continue;
429 __set_bit(i, limit_map);
430 }
432 if (!PageHighMem(page))
433 scrub_pages(page_address(page), 1);
434 #ifdef CONFIG_XEN_SCRUB_PAGES
435 else {
436 scrub_pages(kmap(page), 1);
437 kunmap(page);
438 ++n;
439 }
440 #endif
441 }
442 if (bitmap_empty(limit_map, 1U << order))
443 return 0;
445 if (n)
446 kmap_flush_unused();
448 balloon_lock(flags);
450 /* 1. Zap current PTEs (if any), remembering MFNs. */
451 for (i = 0, n = 0, nr_mcl = 0; i < (1U<<order); i++) {
452 if(!test_bit(i, limit_map))
453 continue;
454 page = &pages[i];
456 out_frames[n] = page_to_pfn(page);
457 in_frames[n] = pfn_to_mfn(out_frames[n]);
459 if (!PageHighMem(page))
460 MULTI_update_va_mapping(cr_mcl + nr_mcl++,
461 (unsigned long)page_address(page),
462 __pte_ma(0), 0);
464 set_phys_to_machine(out_frames[n], INVALID_P2M_ENTRY);
465 ++n;
466 }
467 if (nr_mcl && HYPERVISOR_multicall_check(cr_mcl, nr_mcl, NULL))
468 BUG();
470 /* 2. Get new memory below the required limit. */
471 exchange.in.nr_extents = n;
472 exchange.out.nr_extents = n;
473 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
474 success = (exchange.nr_exchanged == n);
475 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
476 BUG_ON(success && (rc != 0));
477 #if CONFIG_XEN_COMPAT <= 0x030002
478 if (unlikely(rc == -ENOSYS)) {
479 /* Compatibility when XENMEM_exchange is unsupported. */
480 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
481 &exchange.in) != n)
482 BUG();
483 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
484 &exchange.out) != n)
485 BUG();
486 success = 1;
487 }
488 #endif
490 /* 3. Map the new pages in place of old pages. */
491 for (i = 0, n = 0, nr_mcl = 0; i < (1U<<order); i++) {
492 if(!test_bit(i, limit_map))
493 continue;
494 page = &pages[i];
496 frame = success ? out_frames[n] : in_frames[n];
498 if (!PageHighMem(page))
499 MULTI_update_va_mapping(cr_mcl + nr_mcl++,
500 (unsigned long)page_address(page),
501 pfn_pte_ma(frame, PAGE_KERNEL), 0);
503 set_phys_to_machine(page_to_pfn(page), frame);
504 ++n;
505 }
506 if (nr_mcl) {
507 cr_mcl[nr_mcl - 1].args[MULTI_UVMFLAGS_INDEX] = order
508 ? UVMF_TLB_FLUSH|UVMF_ALL
509 : UVMF_INVLPG|UVMF_ALL;
510 if (HYPERVISOR_multicall_check(cr_mcl, nr_mcl, NULL))
511 BUG();
512 }
514 balloon_unlock(flags);
516 if (!success)
517 return -ENOMEM;
519 if (address_bits) {
520 if (order) {
521 BUILD_BUG_ON(sizeof(*limit_map) != sizeof(pages->index));
522 for (i = 0; i < BITS_TO_LONGS(1U << order); ++i)
523 pages[i + 1].index = limit_map[i];
524 }
525 SetPageForeign(pages, undo_limit_pages);
526 }
528 return 0;
529 }
530 EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn);
532 #ifdef __i386__
533 int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
534 {
535 __u32 *lp = (__u32 *)((char *)ldt + entry * 8);
536 maddr_t mach_lp = arbitrary_virt_to_machine(lp);
537 return HYPERVISOR_update_descriptor(
538 mach_lp, (u64)entry_a | ((u64)entry_b<<32));
539 }
540 #endif
542 #define MAX_BATCHED_FULL_PTES 32
544 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
545 unsigned long addr, unsigned long end, pgprot_t newprot)
546 {
547 int rc = 0, i = 0;
548 mmu_update_t u[MAX_BATCHED_FULL_PTES];
549 pte_t *pte;
550 spinlock_t *ptl;
552 if (!xen_feature(XENFEAT_mmu_pt_update_preserve_ad))
553 return 0;
555 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
556 do {
557 if (pte_present(*pte)) {
558 u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
559 | ((unsigned long)pte & ~PAGE_MASK)
560 | MMU_PT_UPDATE_PRESERVE_AD;
561 u[i].val = __pte_val(pte_modify(*pte, newprot));
562 if (++i == MAX_BATCHED_FULL_PTES) {
563 if ((rc = HYPERVISOR_mmu_update(
564 &u[0], i, NULL, DOMID_SELF)) != 0)
565 break;
566 i = 0;
567 }
568 }
569 } while (pte++, addr += PAGE_SIZE, addr != end);
570 if (i)
571 rc = HYPERVISOR_mmu_update( &u[0], i, NULL, DOMID_SELF);
572 pte_unmap_unlock(pte - 1, ptl);
573 BUG_ON(rc && rc != -ENOSYS);
574 return !rc;
575 }