direct-io.hg

view xen/arch/x86/x86_64/mm.c @ 14134:9e5e94942045

x86: Clean up {alloc,free}_xen_pagetable() interface to avoid use of
frame_table variable before initialisation. This wasn't a bug, but was
confusing.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Mon Feb 26 16:25:17 2007 +0000 (2007-02-26)
parents 1e5a83fb928b
children 405573aedd24
line source
1 /******************************************************************************
2 * arch/x86/x86_64/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser tr This
5 * program is free software; you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc., 59
17 * Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <xen/config.h>
21 #include <xen/lib.h>
22 #include <xen/init.h>
23 #include <xen/mm.h>
24 #include <xen/sched.h>
25 #include <xen/guest_access.h>
26 #include <asm/current.h>
27 #include <asm/asm_defns.h>
28 #include <asm/page.h>
29 #include <asm/flushtlb.h>
30 #include <asm/fixmap.h>
31 #include <asm/hypercall.h>
32 #include <asm/msr.h>
33 #include <public/memory.h>
35 #ifdef CONFIG_COMPAT
36 unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
37 #endif
39 void *alloc_xen_pagetable(void)
40 {
41 extern int early_boot;
42 unsigned long mfn;
44 if ( !early_boot )
45 {
46 struct page_info *pg = alloc_domheap_page(NULL);
47 BUG_ON(pg == NULL);
48 return page_to_virt(pg);
49 }
51 /* Early pagetables must come from low 1GB of memory. */
52 mfn = alloc_boot_low_pages(1, 1); /* 0x0 - 0x40000000 */
53 BUG_ON(mfn == 0);
54 return mfn_to_virt(mfn);
55 }
57 void free_xen_pagetable(void *v)
58 {
59 free_domheap_page(virt_to_page(v));
60 }
62 l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
63 {
64 l4_pgentry_t *pl4e;
65 l3_pgentry_t *pl3e;
66 l2_pgentry_t *pl2e;
68 pl4e = &idle_pg_table[l4_table_offset(v)];
69 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
70 {
71 pl3e = alloc_xen_pagetable();
72 clear_page(pl3e);
73 l4e_write(pl4e, l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR));
74 }
76 pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v);
77 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
78 {
79 pl2e = alloc_xen_pagetable();
80 clear_page(pl2e);
81 l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR));
82 }
84 pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v);
85 return pl2e;
86 }
88 void __init paging_init(void)
89 {
90 unsigned long i, mpt_size, va;
91 l3_pgentry_t *l3_ro_mpt;
92 l2_pgentry_t *l2_ro_mpt = NULL;
93 struct page_info *l1_pg, *l2_pg;
95 /* Create user-accessible L2 directory to map the MPT for guests. */
96 if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
97 goto nomem;
98 l3_ro_mpt = clear_page(page_to_virt(l2_pg));
99 l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
100 l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
102 /*
103 * Allocate and map the machine-to-phys table.
104 * This also ensures L3 is present for fixmaps.
105 */
106 mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
107 mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
108 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
109 {
110 if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
111 goto nomem;
112 map_pages_to_xen(
113 RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
114 page_to_mfn(l1_pg),
115 1UL << PAGETABLE_ORDER,
116 PAGE_HYPERVISOR);
117 memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
118 1UL << L2_PAGETABLE_SHIFT);
119 if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
120 {
121 if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
122 goto nomem;
123 va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
124 l2_ro_mpt = clear_page(page_to_virt(l2_pg));
125 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
126 l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
127 l2_ro_mpt += l2_table_offset(va);
128 }
129 /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
130 l2e_write(l2_ro_mpt, l2e_from_page(
131 l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
132 l2_ro_mpt++;
133 }
135 #ifdef CONFIG_COMPAT
136 if ( !compat_disabled )
137 {
138 /* Create user-accessible L2 directory to map the MPT for compatibility guests. */
139 BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) !=
140 l4_table_offset(HIRO_COMPAT_MPT_VIRT_START));
141 l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
142 if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
143 goto nomem;
144 compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
145 l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
146 l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
147 l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
148 /*
149 * Allocate and map the compatibility mode machine-to-phys table.
150 */
151 mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1));
152 if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START )
153 mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START;
154 mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
155 if ( m2p_compat_vstart + mpt_size < MACH2PHYS_COMPAT_VIRT_END )
156 m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
157 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
158 {
159 if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
160 goto nomem;
161 map_pages_to_xen(
162 RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
163 page_to_mfn(l1_pg),
164 1UL << PAGETABLE_ORDER,
165 PAGE_HYPERVISOR);
166 memset((void *)(RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
167 0x55,
168 1UL << L2_PAGETABLE_SHIFT);
169 /* NB. Cannot be GLOBAL as the pt entries get copied into per-VM space. */
170 l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
171 l2_ro_mpt++;
172 }
173 }
174 #endif
176 /* Set up linear page table mapping. */
177 l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)],
178 l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR));
179 return;
181 nomem:
182 panic("Not enough memory for m2p table\n");
183 }
185 void __init setup_idle_pagetable(void)
186 {
187 /* Install per-domain mappings for idle domain. */
188 l4e_write(&idle_pg_table[l4_table_offset(PERDOMAIN_VIRT_START)],
189 l4e_from_page(
190 virt_to_page(idle_vcpu[0]->domain->arch.mm_perdomain_l3),
191 __PAGE_HYPERVISOR));
192 }
194 void __init zap_low_mappings(void)
195 {
196 l4e_write(&idle_pg_table[0], l4e_empty());
197 flush_tlb_all_pge();
198 }
200 void subarch_init_memory(void)
201 {
202 unsigned long i, v, m2p_start_mfn;
203 l3_pgentry_t l3e;
204 l2_pgentry_t l2e;
206 /*
207 * We are rather picky about the layout of 'struct page_info'. The
208 * count_info and domain fields must be adjacent, as we perform atomic
209 * 64-bit operations on them.
210 */
211 BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) !=
212 (offsetof(struct page_info, count_info) + sizeof(u32)));
213 BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0);
214 BUILD_BUG_ON(sizeof(struct page_info) !=
215 (32 + BITS_TO_LONGS(NR_CPUS)*sizeof(long)));
217 /* M2P table is mappable read-only by privileged domains. */
218 for ( v = RDWR_MPT_VIRT_START;
219 v != RDWR_MPT_VIRT_END;
220 v += 1 << L2_PAGETABLE_SHIFT )
221 {
222 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
223 l3_table_offset(v)];
224 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
225 continue;
226 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
227 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
228 continue;
229 m2p_start_mfn = l2e_get_pfn(l2e);
231 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
232 {
233 struct page_info *page = mfn_to_page(m2p_start_mfn + i);
234 share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
235 }
236 }
237 #ifdef CONFIG_COMPAT
238 if ( !compat_disabled )
239 {
240 for ( v = RDWR_COMPAT_MPT_VIRT_START;
241 v != RDWR_COMPAT_MPT_VIRT_END;
242 v += 1 << L2_PAGETABLE_SHIFT )
243 {
244 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
245 l3_table_offset(v)];
246 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
247 continue;
248 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
249 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
250 continue;
251 m2p_start_mfn = l2e_get_pfn(l2e);
253 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
254 {
255 struct page_info *page = mfn_to_page(m2p_start_mfn + i);
256 share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
257 }
258 }
259 }
260 #endif
261 }
263 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
264 {
265 struct xen_machphys_mfn_list xmml;
266 l3_pgentry_t l3e;
267 l2_pgentry_t l2e;
268 unsigned long v;
269 xen_pfn_t mfn;
270 unsigned int i;
271 long rc = 0;
273 switch ( op )
274 {
275 case XENMEM_machphys_mfn_list:
276 if ( copy_from_guest(&xmml, arg, 1) )
277 return -EFAULT;
279 for ( i = 0, v = RDWR_MPT_VIRT_START;
280 (i != xmml.max_extents) && (v != RDWR_MPT_VIRT_END);
281 i++, v += 1 << 21 )
282 {
283 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
284 l3_table_offset(v)];
285 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
286 break;
287 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
288 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
289 break;
290 mfn = l2e_get_pfn(l2e) + l1_table_offset(v);
291 if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
292 return -EFAULT;
293 }
295 xmml.nr_extents = i;
296 if ( copy_to_guest(arg, &xmml, 1) )
297 return -EFAULT;
299 break;
301 default:
302 rc = -ENOSYS;
303 break;
304 }
306 return rc;
307 }
309 long do_stack_switch(unsigned long ss, unsigned long esp)
310 {
311 fixup_guest_stack_selector(current->domain, ss);
312 current->arch.guest_context.kernel_ss = ss;
313 current->arch.guest_context.kernel_sp = esp;
314 return 0;
315 }
317 long do_set_segment_base(unsigned int which, unsigned long base)
318 {
319 struct vcpu *v = current;
320 long ret = 0;
322 switch ( which )
323 {
324 case SEGBASE_FS:
325 if ( wrmsr_safe(MSR_FS_BASE, base, base>>32) )
326 ret = -EFAULT;
327 else
328 v->arch.guest_context.fs_base = base;
329 break;
331 case SEGBASE_GS_USER:
332 if ( wrmsr_safe(MSR_SHADOW_GS_BASE, base, base>>32) )
333 ret = -EFAULT;
334 else
335 v->arch.guest_context.gs_base_user = base;
336 break;
338 case SEGBASE_GS_KERNEL:
339 if ( wrmsr_safe(MSR_GS_BASE, base, base>>32) )
340 ret = -EFAULT;
341 else
342 v->arch.guest_context.gs_base_kernel = base;
343 break;
345 case SEGBASE_GS_USER_SEL:
346 __asm__ __volatile__ (
347 " swapgs \n"
348 "1: movl %k0,%%gs \n"
349 " "safe_swapgs" \n"
350 ".section .fixup,\"ax\" \n"
351 "2: xorl %k0,%k0 \n"
352 " jmp 1b \n"
353 ".previous \n"
354 ".section __ex_table,\"a\"\n"
355 " .align 8 \n"
356 " .quad 1b,2b \n"
357 ".previous "
358 : : "r" (base&0xffff) );
359 break;
361 default:
362 ret = -EINVAL;
363 break;
364 }
366 return ret;
367 }
370 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
371 int check_descriptor(const struct domain *dom, struct desc_struct *d)
372 {
373 u32 a = d->a, b = d->b;
374 u16 cs;
376 /* A not-present descriptor will always fault, so is safe. */
377 if ( !(b & _SEGMENT_P) )
378 goto good;
380 /* Check and fix up the DPL. */
381 if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
382 d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
384 /* All code and data segments are okay. No base/limit checking. */
385 if ( (b & _SEGMENT_S) )
386 {
387 if ( !IS_COMPAT(dom) || !(b & _SEGMENT_L) )
388 goto good;
389 goto bad;
390 }
392 /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
393 if ( (b & _SEGMENT_TYPE) == 0x000 )
394 goto good;
396 /* Everything but a call gate is discarded here. */
397 if ( (b & _SEGMENT_TYPE) != 0xc00 )
398 goto bad;
400 /* Validate and fix up the target code selector. */
401 cs = a >> 16;
402 fixup_guest_code_selector(dom, cs);
403 if ( !guest_gate_selector_okay(dom, cs) )
404 goto bad;
405 a = d->a = (d->a & 0xffffU) | (cs << 16);
407 /* Reserved bits must be zero. */
408 if ( (b & 0xe0) != 0 )
409 goto bad;
411 good:
412 return 1;
413 bad:
414 return 0;
415 }
417 unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits)
418 {
419 if ( d == NULL )
420 return bits;
421 return min(d->arch.physaddr_bitsize, bits);
422 }
424 #include "compat/mm.c"
426 /*
427 * Local variables:
428 * mode: C
429 * c-set-style: "BSD"
430 * c-basic-offset: 4
431 * tab-width: 4
432 * indent-tabs-mode: nil
433 * End:
434 */