direct-io.hg

view xen/arch/x86/x86_32/mm.c @ 14134:9e5e94942045

x86: Clean up {alloc,free}_xen_pagetable() interface to avoid use of
frame_table variable before initialisation. This wasn't a bug, but was
confusing.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Mon Feb 26 16:25:17 2007 +0000 (2007-02-26)
parents 7c5eea5feebd
children 23c4790512db
line source
1 /******************************************************************************
2 * arch/x86/x86_32/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <xen/sched.h>
26 #include <xen/guest_access.h>
27 #include <asm/current.h>
28 #include <asm/page.h>
29 #include <asm/flushtlb.h>
30 #include <asm/fixmap.h>
31 #include <public/memory.h>
33 unsigned int PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
34 unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
36 static unsigned long mpt_size;
38 void *alloc_xen_pagetable(void)
39 {
40 extern int early_boot;
41 extern unsigned long xenheap_phys_start;
42 unsigned long mfn;
44 if ( !early_boot )
45 {
46 void *v = alloc_xenheap_page();
47 BUG_ON(v == NULL);
48 return v;
49 }
51 mfn = xenheap_phys_start >> PAGE_SHIFT;
52 xenheap_phys_start += PAGE_SIZE;
53 return mfn_to_virt(mfn);
54 }
56 void free_xen_pagetable(void *v)
57 {
58 free_xenheap_page(v);
59 }
61 l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
62 {
63 return &idle_pg_table_l2[l2_linear_offset(v)];
64 }
66 void __init paging_init(void)
67 {
68 void *ioremap_pt;
69 unsigned long v;
70 struct page_info *pg;
71 int i;
73 #ifdef CONFIG_X86_PAE
74 printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
75 #else
76 printk("PAE disabled.\n");
77 #endif
79 if ( cpu_has_pge )
80 {
81 /* Suitable Xen mapping can be GLOBAL. */
82 set_in_cr4(X86_CR4_PGE);
83 PAGE_HYPERVISOR |= _PAGE_GLOBAL;
84 PAGE_HYPERVISOR_NOCACHE |= _PAGE_GLOBAL;
85 /* Transform early mappings (e.g., the frametable). */
86 for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
87 if ( (l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) &
88 (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT) )
89 l2e_add_flags(idle_pg_table_l2[l2_linear_offset(v)],
90 _PAGE_GLOBAL);
91 }
93 /*
94 * Allocate and map the machine-to-phys table and create read-only mapping
95 * of MPT for guest-OS use.
96 */
97 mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
98 mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
99 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
100 {
101 if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
102 panic("Not enough memory to bootstrap Xen.\n");
103 l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i],
104 l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE));
105 /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
106 l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i],
107 l2e_from_page(
108 pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW));
109 }
111 /* Fill with an obvious debug pattern. */
112 for ( i = 0; i < (mpt_size / BYTES_PER_LONG); i++)
113 set_gpfn_from_mfn(i, 0x55555555);
115 /* Create page tables for ioremap(). */
116 for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
117 {
118 ioremap_pt = alloc_xenheap_page();
119 clear_page(ioremap_pt);
120 l2e_write(&idle_pg_table_l2[l2_linear_offset(IOREMAP_VIRT_START) + i],
121 l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR));
122 }
123 }
125 void __init setup_idle_pagetable(void)
126 {
127 int i;
129 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
130 l2e_write(&idle_pg_table_l2[l2_linear_offset(PERDOMAIN_VIRT_START)+i],
131 l2e_from_page(virt_to_page(idle_vcpu[0]->domain->
132 arch.mm_perdomain_pt) + i,
133 __PAGE_HYPERVISOR));
134 }
136 void __init zap_low_mappings(l2_pgentry_t *base)
137 {
138 int i;
139 u32 addr;
141 for ( i = 0; ; i++ )
142 {
143 addr = i << L2_PAGETABLE_SHIFT;
144 if ( addr >= HYPERVISOR_VIRT_START )
145 break;
146 if ( l2e_get_paddr(base[i]) != addr )
147 continue;
148 l2e_write(&base[i], l2e_empty());
149 }
151 flush_tlb_all_pge();
152 }
154 void subarch_init_memory(void)
155 {
156 unsigned long m2p_start_mfn;
157 unsigned int i, j;
159 /*
160 * We are rather picky about the layout of 'struct page_info'. The
161 * count_info and domain fields must be adjacent, as we perform atomic
162 * 64-bit operations on them. Also, just for sanity, we assert the size
163 * of the structure here.
164 */
165 BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) !=
166 (offsetof(struct page_info, count_info) + sizeof(u32)));
167 BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0);
168 BUILD_BUG_ON(sizeof(struct page_info) != 24);
170 /* M2P table is mappable read-only by privileged domains. */
171 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
172 {
173 m2p_start_mfn = l2e_get_pfn(
174 idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]);
175 for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ )
176 {
177 struct page_info *page = mfn_to_page(m2p_start_mfn + j);
178 share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
179 }
180 }
182 if ( supervisor_mode_kernel )
183 {
184 /* Guest kernel runs in ring 0, not ring 1. */
185 struct desc_struct *d;
186 d = &gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
187 d[0].b &= ~_SEGMENT_DPL;
188 d[1].b &= ~_SEGMENT_DPL;
189 }
190 }
192 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
193 {
194 struct xen_machphys_mfn_list xmml;
195 unsigned long mfn;
196 unsigned int i, max;
197 long rc = 0;
199 switch ( op )
200 {
201 case XENMEM_machphys_mfn_list:
202 if ( copy_from_guest(&xmml, arg, 1) )
203 return -EFAULT;
205 max = min_t(unsigned int, xmml.max_extents, mpt_size >> 21);
207 for ( i = 0; i < max; i++ )
208 {
209 mfn = l2e_get_pfn(idle_pg_table_l2[l2_linear_offset(
210 RDWR_MPT_VIRT_START + (i << 21))]) + l1_table_offset(i << 21);
211 if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
212 return -EFAULT;
213 }
215 xmml.nr_extents = i;
216 if ( copy_to_guest(arg, &xmml, 1) )
217 return -EFAULT;
219 break;
221 default:
222 rc = -ENOSYS;
223 break;
224 }
226 return rc;
227 }
229 long do_stack_switch(unsigned long ss, unsigned long esp)
230 {
231 int nr = smp_processor_id();
232 struct tss_struct *t = &init_tss[nr];
234 fixup_guest_stack_selector(current->domain, ss);
236 current->arch.guest_context.kernel_ss = ss;
237 current->arch.guest_context.kernel_sp = esp;
238 t->ss1 = ss;
239 t->esp1 = esp;
241 return 0;
242 }
244 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
245 int check_descriptor(const struct domain *dom, struct desc_struct *d)
246 {
247 unsigned long base, limit;
248 u32 a = d->a, b = d->b;
249 u16 cs;
251 /* Let a ring0 guest kernel set any descriptor it wants to. */
252 if ( supervisor_mode_kernel )
253 return 1;
255 /* A not-present descriptor will always fault, so is safe. */
256 if ( !(b & _SEGMENT_P) )
257 goto good;
259 /*
260 * We don't allow a DPL of zero. There is no legitimate reason for
261 * specifying DPL==0, and it gets rather dangerous if we also accept call
262 * gates (consider a call gate pointing at another kernel descriptor with
263 * DPL 0 -- this would get the OS ring-0 privileges).
264 */
265 if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
266 d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
268 if ( !(b & _SEGMENT_S) )
269 {
270 /*
271 * System segment:
272 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
273 * 2. Don't allow TSS descriptors or task gates as we don't
274 * virtualise x86 tasks.
275 * 3. Don't allow LDT descriptors because they're unnecessary and
276 * I'm uneasy about allowing an LDT page to contain LDT
277 * descriptors. In any case, Xen automatically creates the
278 * required descriptor when reloading the LDT register.
279 * 4. We allow call gates but they must not jump to a private segment.
280 */
282 /* Disallow everything but call gates. */
283 if ( (b & _SEGMENT_TYPE) != 0xc00 )
284 goto bad;
286 /* Validate and fix up the target code selector. */
287 cs = a >> 16;
288 fixup_guest_code_selector(dom, cs);
289 if ( !guest_gate_selector_okay(dom, cs) )
290 goto bad;
291 a = d->a = (d->a & 0xffffU) | (cs << 16);
293 /* Reserved bits must be zero. */
294 if ( (b & 0xe0) != 0 )
295 goto bad;
297 /* No base/limit check is needed for a call gate. */
298 goto good;
299 }
301 /* Check that base is at least a page away from Xen-private area. */
302 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
303 if ( base >= (GUEST_SEGMENT_MAX_ADDR - PAGE_SIZE) )
304 goto bad;
306 /* Check and truncate the limit if necessary. */
307 limit = (b&0xf0000) | (a&0xffff);
308 limit++; /* We add one because limit is inclusive. */
309 if ( (b & _SEGMENT_G) )
310 limit <<= 12;
312 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
313 {
314 /*
315 * DATA, GROWS-DOWN.
316 * Grows-down limit check.
317 * NB. limit == 0xFFFFF provides no access (if G=1).
318 * limit == 0x00000 provides 4GB-4kB access (if G=1).
319 */
320 if ( (base + limit) > base )
321 {
322 limit = -(base & PAGE_MASK);
323 goto truncate;
324 }
325 }
326 else
327 {
328 /*
329 * DATA, GROWS-UP.
330 * CODE (CONFORMING AND NON-CONFORMING).
331 * Grows-up limit check.
332 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
333 * limit == 0x00000 provides 4kB access (if G=1).
334 */
335 if ( ((base + limit) <= base) ||
336 ((base + limit) > GUEST_SEGMENT_MAX_ADDR) )
337 {
338 limit = GUEST_SEGMENT_MAX_ADDR - base;
339 truncate:
340 if ( !(b & _SEGMENT_G) )
341 goto bad; /* too dangerous; too hard to work out... */
342 limit = (limit >> 12) - 1;
343 d->a &= ~0x0ffff; d->a |= limit & 0x0ffff;
344 d->b &= ~0xf0000; d->b |= limit & 0xf0000;
345 }
346 }
348 good:
349 return 1;
350 bad:
351 return 0;
352 }
354 /*
355 * Local variables:
356 * mode: C
357 * c-set-style: "BSD"
358 * c-basic-offset: 4
359 * tab-width: 4
360 * indent-tabs-mode: nil
361 * End:
362 */