direct-io.hg

view linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c @ 10353:bd1a0b2bb2d4

[LINUX] Only destroy a machine-contiguous memory region if
it really is contiguous (e.g., create_contiguous did not fail).
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Jun 15 11:35:23 2006 +0100 (2006-06-15)
parents ac5e98c1c466
children 2ac74e1df3d7
line source
1 /******************************************************************************
2 * mm/hypervisor.c
3 *
4 * Update page tables via the hypervisor.
5 *
6 * Copyright (c) 2002-2004, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
33 #include <linux/config.h>
34 #include <linux/sched.h>
35 #include <linux/mm.h>
36 #include <linux/vmalloc.h>
37 #include <asm/page.h>
38 #include <asm/pgtable.h>
39 #include <asm/hypervisor.h>
40 #include <xen/balloon.h>
41 #include <xen/features.h>
42 #include <xen/interface/memory.h>
43 #include <linux/module.h>
44 #include <linux/percpu.h>
45 #include <asm/tlbflush.h>
47 #ifdef CONFIG_X86_64
48 #define pmd_val_ma(v) (v).pmd
49 #else
50 #ifdef CONFIG_X86_PAE
51 # define pmd_val_ma(v) ((v).pmd)
52 # define pud_val_ma(v) ((v).pgd.pgd)
53 #else
54 # define pmd_val_ma(v) ((v).pud.pgd.pgd)
55 #endif
56 #endif
58 void xen_l1_entry_update(pte_t *ptr, pte_t val)
59 {
60 mmu_update_t u;
61 u.ptr = virt_to_machine(ptr);
62 u.val = pte_val_ma(val);
63 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
64 }
66 void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
67 {
68 mmu_update_t u;
69 u.ptr = virt_to_machine(ptr);
70 u.val = pmd_val_ma(val);
71 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
72 }
74 #ifdef CONFIG_X86_PAE
75 void xen_l3_entry_update(pud_t *ptr, pud_t val)
76 {
77 mmu_update_t u;
78 u.ptr = virt_to_machine(ptr);
79 u.val = pud_val_ma(val);
80 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
81 }
82 #endif
84 #ifdef CONFIG_X86_64
85 void xen_l3_entry_update(pud_t *ptr, pud_t val)
86 {
87 mmu_update_t u;
88 u.ptr = virt_to_machine(ptr);
89 u.val = val.pud;
90 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
91 }
93 void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
94 {
95 mmu_update_t u;
96 u.ptr = virt_to_machine(ptr);
97 u.val = val.pgd;
98 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
99 }
100 #endif /* CONFIG_X86_64 */
102 void xen_machphys_update(unsigned long mfn, unsigned long pfn)
103 {
104 mmu_update_t u;
105 if (xen_feature(XENFEAT_auto_translated_physmap)) {
106 BUG_ON(pfn != mfn);
107 return;
108 }
109 u.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
110 u.val = pfn;
111 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
112 }
114 void xen_pt_switch(unsigned long ptr)
115 {
116 struct mmuext_op op;
117 op.cmd = MMUEXT_NEW_BASEPTR;
118 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
119 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
120 }
122 void xen_new_user_pt(unsigned long ptr)
123 {
124 struct mmuext_op op;
125 op.cmd = MMUEXT_NEW_USER_BASEPTR;
126 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
127 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
128 }
130 void xen_tlb_flush(void)
131 {
132 struct mmuext_op op;
133 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
134 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
135 }
136 EXPORT_SYMBOL(xen_tlb_flush);
138 void xen_invlpg(unsigned long ptr)
139 {
140 struct mmuext_op op;
141 op.cmd = MMUEXT_INVLPG_LOCAL;
142 op.arg1.linear_addr = ptr & PAGE_MASK;
143 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
144 }
145 EXPORT_SYMBOL(xen_invlpg);
147 #ifdef CONFIG_SMP
149 void xen_tlb_flush_all(void)
150 {
151 struct mmuext_op op;
152 op.cmd = MMUEXT_TLB_FLUSH_ALL;
153 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
154 }
156 void xen_tlb_flush_mask(cpumask_t *mask)
157 {
158 struct mmuext_op op;
159 if ( cpus_empty(*mask) )
160 return;
161 op.cmd = MMUEXT_TLB_FLUSH_MULTI;
162 op.arg2.vcpumask = mask->bits;
163 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
164 }
166 void xen_invlpg_all(unsigned long ptr)
167 {
168 struct mmuext_op op;
169 op.cmd = MMUEXT_INVLPG_ALL;
170 op.arg1.linear_addr = ptr & PAGE_MASK;
171 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
172 }
174 void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
175 {
176 struct mmuext_op op;
177 if ( cpus_empty(*mask) )
178 return;
179 op.cmd = MMUEXT_INVLPG_MULTI;
180 op.arg1.linear_addr = ptr & PAGE_MASK;
181 op.arg2.vcpumask = mask->bits;
182 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
183 }
185 #endif /* CONFIG_SMP */
187 void xen_pgd_pin(unsigned long ptr)
188 {
189 struct mmuext_op op;
190 #ifdef CONFIG_X86_64
191 op.cmd = MMUEXT_PIN_L4_TABLE;
192 #elif defined(CONFIG_X86_PAE)
193 op.cmd = MMUEXT_PIN_L3_TABLE;
194 #else
195 op.cmd = MMUEXT_PIN_L2_TABLE;
196 #endif
197 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
198 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
199 }
201 void xen_pgd_unpin(unsigned long ptr)
202 {
203 struct mmuext_op op;
204 op.cmd = MMUEXT_UNPIN_TABLE;
205 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
206 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
207 }
209 void xen_set_ldt(unsigned long ptr, unsigned long len)
210 {
211 struct mmuext_op op;
212 op.cmd = MMUEXT_SET_LDT;
213 op.arg1.linear_addr = ptr;
214 op.arg2.nr_ents = len;
215 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
216 }
218 /*
219 * Bitmap is indexed by page number. If bit is set, the page is part of a
220 * xen_create_contiguous_region() area of memory.
221 */
222 unsigned long *contiguous_bitmap;
224 static void contiguous_bitmap_set(
225 unsigned long first_page, unsigned long nr_pages)
226 {
227 unsigned long start_off, end_off, curr_idx, end_idx;
229 curr_idx = first_page / BITS_PER_LONG;
230 start_off = first_page & (BITS_PER_LONG-1);
231 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
232 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
234 if (curr_idx == end_idx) {
235 contiguous_bitmap[curr_idx] |=
236 ((1UL<<end_off)-1) & -(1UL<<start_off);
237 } else {
238 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
239 while ( ++curr_idx < end_idx )
240 contiguous_bitmap[curr_idx] = ~0UL;
241 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
242 }
243 }
245 static void contiguous_bitmap_clear(
246 unsigned long first_page, unsigned long nr_pages)
247 {
248 unsigned long start_off, end_off, curr_idx, end_idx;
250 curr_idx = first_page / BITS_PER_LONG;
251 start_off = first_page & (BITS_PER_LONG-1);
252 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
253 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
255 if (curr_idx == end_idx) {
256 contiguous_bitmap[curr_idx] &=
257 -(1UL<<end_off) | ((1UL<<start_off)-1);
258 } else {
259 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
260 while ( ++curr_idx != end_idx )
261 contiguous_bitmap[curr_idx] = 0;
262 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
263 }
264 }
266 /* Ensure multi-page extents are contiguous in machine memory. */
267 int xen_create_contiguous_region(
268 unsigned long vstart, unsigned int order, unsigned int address_bits)
269 {
270 pgd_t *pgd;
271 pud_t *pud;
272 pmd_t *pmd;
273 pte_t *pte;
274 unsigned long frame, i, flags;
275 struct xen_memory_reservation reservation = {
276 .nr_extents = 1,
277 .extent_order = 0,
278 .domid = DOMID_SELF
279 };
280 set_xen_guest_handle(reservation.extent_start, &frame);
282 /*
283 * Currently an auto-translated guest will not perform I/O, nor will
284 * it require PAE page directories below 4GB. Therefore any calls to
285 * this function are redundant and can be ignored.
286 */
287 if (xen_feature(XENFEAT_auto_translated_physmap))
288 return 0;
290 scrub_pages(vstart, 1 << order);
292 balloon_lock(flags);
294 /* 1. Zap current PTEs, giving away the underlying pages. */
295 for (i = 0; i < (1<<order); i++) {
296 pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
297 pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
298 pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
299 pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
300 frame = pte_mfn(*pte);
301 BUG_ON(HYPERVISOR_update_va_mapping(
302 vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
303 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
304 INVALID_P2M_ENTRY);
305 BUG_ON(HYPERVISOR_memory_op(
306 XENMEM_decrease_reservation, &reservation) != 1);
307 }
309 /* 2. Get a new contiguous memory extent. */
310 reservation.extent_order = order;
311 reservation.address_bits = address_bits;
312 frame = __pa(vstart) >> PAGE_SHIFT;
313 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
314 &reservation) != 1)
315 goto fail;
317 /* 3. Map the new extent in place of old pages. */
318 for (i = 0; i < (1<<order); i++) {
319 BUG_ON(HYPERVISOR_update_va_mapping(
320 vstart + (i*PAGE_SIZE),
321 pfn_pte_ma(frame+i, PAGE_KERNEL), 0));
322 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame+i);
323 }
325 flush_tlb_all();
327 contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
329 balloon_unlock(flags);
331 return 0;
333 fail:
334 reservation.extent_order = 0;
335 reservation.address_bits = 0;
337 for (i = 0; i < (1<<order); i++) {
338 frame = (__pa(vstart) >> PAGE_SHIFT) + i;
339 BUG_ON(HYPERVISOR_memory_op(
340 XENMEM_populate_physmap, &reservation) != 1);
341 BUG_ON(HYPERVISOR_update_va_mapping(
342 vstart + (i*PAGE_SIZE),
343 pfn_pte_ma(frame, PAGE_KERNEL), 0));
344 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
345 }
347 flush_tlb_all();
349 balloon_unlock(flags);
351 return -ENOMEM;
352 }
354 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
355 {
356 pgd_t *pgd;
357 pud_t *pud;
358 pmd_t *pmd;
359 pte_t *pte;
360 unsigned long frame, i, flags;
361 struct xen_memory_reservation reservation = {
362 .nr_extents = 1,
363 .extent_order = 0,
364 .domid = DOMID_SELF
365 };
366 set_xen_guest_handle(reservation.extent_start, &frame);
368 if (xen_feature(XENFEAT_auto_translated_physmap) ||
369 !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
370 return;
372 scrub_pages(vstart, 1 << order);
374 balloon_lock(flags);
376 contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
378 /* 1. Zap current PTEs, giving away the underlying pages. */
379 for (i = 0; i < (1<<order); i++) {
380 pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
381 pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
382 pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
383 pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
384 frame = pte_mfn(*pte);
385 BUG_ON(HYPERVISOR_update_va_mapping(
386 vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
387 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
388 INVALID_P2M_ENTRY);
389 BUG_ON(HYPERVISOR_memory_op(
390 XENMEM_decrease_reservation, &reservation) != 1);
391 }
393 /* 2. Map new pages in place of old pages. */
394 for (i = 0; i < (1<<order); i++) {
395 frame = (__pa(vstart) >> PAGE_SHIFT) + i;
396 BUG_ON(HYPERVISOR_memory_op(
397 XENMEM_populate_physmap, &reservation) != 1);
398 BUG_ON(HYPERVISOR_update_va_mapping(
399 vstart + (i*PAGE_SIZE),
400 pfn_pte_ma(frame, PAGE_KERNEL), 0));
401 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
402 }
404 flush_tlb_all();
406 balloon_unlock(flags);
407 }
409 #ifdef __i386__
410 int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
411 {
412 __u32 *lp = (__u32 *)((char *)ldt + entry * 8);
413 maddr_t mach_lp = arbitrary_virt_to_machine(lp);
414 return HYPERVISOR_update_descriptor(
415 mach_lp, (u64)entry_a | ((u64)entry_b<<32));
416 }
417 #endif