ia64/xen-unstable

view xen/arch/x86/x86_32/mm.c @ 6042:69b7c9c3a9fd

Fix mach2phys table allocation for 32-bit PAE Xen.
Found by Gerd Knorr.
author kaf24@firebug.cl.cam.ac.uk
date Fri Aug 05 15:11:46 2005 +0000 (2005-08-05)
parents f294acb25858
children 5ec5f6960507 40b887fa79d0
line source
1 /******************************************************************************
2 * arch/x86/x86_32/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <xen/sched.h>
26 #include <asm/current.h>
27 #include <asm/page.h>
28 #include <asm/flushtlb.h>
29 #include <asm/fixmap.h>
31 extern l1_pgentry_t *mapcache;
33 unsigned int PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
34 unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
36 static unsigned long mpt_size;
38 struct pfn_info *alloc_xen_pagetable(void)
39 {
40 extern int early_boot;
41 extern unsigned long xenheap_phys_start;
42 struct pfn_info *pg;
44 if ( !early_boot )
45 {
46 void *v = alloc_xenheap_page();
47 return ((v == NULL) ? NULL : virt_to_page(v));
48 }
50 pg = phys_to_page(xenheap_phys_start);
51 xenheap_phys_start += PAGE_SIZE;
52 return pg;
53 }
55 void free_xen_pagetable(struct pfn_info *pg)
56 {
57 free_xenheap_page(page_to_virt(pg));
58 }
60 l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
61 {
62 return &idle_pg_table_l2[l2_linear_offset(v)];
63 }
65 void __init paging_init(void)
66 {
67 void *ioremap_pt;
68 unsigned long v;
69 struct pfn_info *pg;
70 int i, mapcache_order;
72 #ifdef CONFIG_X86_PAE
73 printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
74 #else
75 printk("PAE disabled.\n");
76 #endif
78 idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
80 if ( cpu_has_pge )
81 {
82 /* Suitable Xen mapping can be GLOBAL. */
83 set_in_cr4(X86_CR4_PGE);
84 PAGE_HYPERVISOR |= _PAGE_GLOBAL;
85 PAGE_HYPERVISOR_NOCACHE |= _PAGE_GLOBAL;
86 /* Transform early mappings (e.g., the frametable). */
87 for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
88 if ( (l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) &
89 (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT) )
90 l2e_add_flags(idle_pg_table_l2[l2_linear_offset(v)],
91 _PAGE_GLOBAL);
92 }
94 /*
95 * Allocate and map the machine-to-phys table and create read-only mapping
96 * of MPT for guest-OS use. Without PAE we'll end up with one 4MB page,
97 * with PAE we'll allocate 2MB pages depending on the amount of memory
98 * installed, but at least 4MB to cover 4GB address space. This is needed
99 * to make PCI I/O memory address lookups work in guests.
100 */
101 mpt_size = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL;
102 mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
103 if ( mpt_size < (4 << 20) )
104 mpt_size = 4 << 20;
105 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
106 {
107 if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
108 panic("Not enough memory to bootstrap Xen.\n");
109 idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i] =
110 l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE);
111 idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i] =
112 l2e_from_page(pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW);
113 }
114 memset((void *)RDWR_MPT_VIRT_START, 0x55, mpt_size);
116 /* Create page tables for ioremap(). */
117 for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
118 {
119 ioremap_pt = alloc_xenheap_page();
120 clear_page(ioremap_pt);
121 idle_pg_table_l2[l2_linear_offset(IOREMAP_VIRT_START) + i] =
122 l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR);
123 }
125 /* Set up mapping cache for domain pages. */
126 mapcache_order = get_order(MAPCACHE_MBYTES << (20 - PAGETABLE_ORDER));
127 mapcache = alloc_xenheap_pages(mapcache_order);
128 memset(mapcache, 0, PAGE_SIZE << mapcache_order);
129 for ( i = 0; i < (MAPCACHE_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
130 idle_pg_table_l2[l2_linear_offset(MAPCACHE_VIRT_START) + i] =
131 l2e_from_page(virt_to_page(mapcache) + i, __PAGE_HYPERVISOR);
132 }
134 void __init zap_low_mappings(l2_pgentry_t *base)
135 {
136 int i;
137 u32 addr;
139 for (i = 0; ; i++) {
140 addr = (i << L2_PAGETABLE_SHIFT);
141 if (addr >= HYPERVISOR_VIRT_START)
142 break;
143 if (l2e_get_paddr(base[i]) != addr)
144 continue;
145 base[i] = l2e_empty();
146 }
147 flush_tlb_all_pge();
148 }
150 void subarch_init_memory(struct domain *dom_xen)
151 {
152 unsigned long m2p_start_mfn;
153 int i;
155 /*
156 * We are rather picky about the layout of 'struct pfn_info'. The
157 * count_info and domain fields must be adjacent, as we perform atomic
158 * 64-bit operations on them. Also, just for sanity, we assert the size
159 * of the structure here.
160 */
161 if ( (offsetof(struct pfn_info, u.inuse._domain) !=
162 (offsetof(struct pfn_info, count_info) + sizeof(u32))) ||
163 (sizeof(struct pfn_info) != 24) )
164 {
165 printk("Weird pfn_info layout (%ld,%ld,%d)\n",
166 offsetof(struct pfn_info, count_info),
167 offsetof(struct pfn_info, u.inuse._domain),
168 sizeof(struct pfn_info));
169 BUG();
170 }
172 /* M2P table is mappable read-only by privileged domains. */
173 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
174 {
175 m2p_start_mfn = l2e_get_pfn(
176 idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]);
177 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
178 {
179 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
180 /* Ensure it's only mapped read-only by domains. */
181 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
182 page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
183 }
184 }
185 }
188 long do_stack_switch(unsigned long ss, unsigned long esp)
189 {
190 int nr = smp_processor_id();
191 struct tss_struct *t = &init_tss[nr];
193 if ( (ss & 3) != 1 )
194 return -EPERM;
196 current->arch.guest_context.kernel_ss = ss;
197 current->arch.guest_context.kernel_sp = esp;
198 t->ss1 = ss;
199 t->esp1 = esp;
201 return 0;
202 }
204 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
205 int check_descriptor(struct desc_struct *d)
206 {
207 unsigned long base, limit;
208 u32 a = d->a, b = d->b;
210 /* A not-present descriptor will always fault, so is safe. */
211 if ( !(b & _SEGMENT_P) )
212 goto good;
214 /*
215 * We don't allow a DPL of zero. There is no legitimate reason for
216 * specifying DPL==0, and it gets rather dangerous if we also accept call
217 * gates (consider a call gate pointing at another kernel descriptor with
218 * DPL 0 -- this would get the OS ring-0 privileges).
219 */
220 if ( (b & _SEGMENT_DPL) == 0 )
221 goto bad;
223 if ( !(b & _SEGMENT_S) )
224 {
225 /*
226 * System segment:
227 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
228 * 2. Don't allow TSS descriptors or task gates as we don't
229 * virtualise x86 tasks.
230 * 3. Don't allow LDT descriptors because they're unnecessary and
231 * I'm uneasy about allowing an LDT page to contain LDT
232 * descriptors. In any case, Xen automatically creates the
233 * required descriptor when reloading the LDT register.
234 * 4. We allow call gates but they must not jump to a private segment.
235 */
237 /* Disallow everything but call gates. */
238 if ( (b & _SEGMENT_TYPE) != 0xc00 )
239 goto bad;
241 /* Can't allow far jump to a Xen-private segment. */
242 if ( !VALID_CODESEL(a>>16) )
243 goto bad;
245 /* Reserved bits must be zero. */
246 if ( (b & 0xe0) != 0 )
247 goto bad;
249 /* No base/limit check is needed for a call gate. */
250 goto good;
251 }
253 /* Check that base is at least a page away from Xen-private area. */
254 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
255 if ( base >= (GUEST_SEGMENT_MAX_ADDR - PAGE_SIZE) )
256 goto bad;
258 /* Check and truncate the limit if necessary. */
259 limit = (b&0xf0000) | (a&0xffff);
260 limit++; /* We add one because limit is inclusive. */
261 if ( (b & _SEGMENT_G) )
262 limit <<= 12;
264 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
265 {
266 /*
267 * DATA, GROWS-DOWN.
268 * Grows-down limit check.
269 * NB. limit == 0xFFFFF provides no access (if G=1).
270 * limit == 0x00000 provides 4GB-4kB access (if G=1).
271 */
272 if ( (base + limit) > base )
273 {
274 limit = -(base & PAGE_MASK);
275 goto truncate;
276 }
277 }
278 else
279 {
280 /*
281 * DATA, GROWS-UP.
282 * CODE (CONFORMING AND NON-CONFORMING).
283 * Grows-up limit check.
284 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
285 * limit == 0x00000 provides 4kB access (if G=1).
286 */
287 if ( ((base + limit) <= base) ||
288 ((base + limit) > GUEST_SEGMENT_MAX_ADDR) )
289 {
290 limit = GUEST_SEGMENT_MAX_ADDR - base;
291 truncate:
292 if ( !(b & _SEGMENT_G) )
293 goto bad; /* too dangerous; too hard to work out... */
294 limit = (limit >> 12) - 1;
295 d->a &= ~0x0ffff; d->a |= limit & 0x0ffff;
296 d->b &= ~0xf0000; d->b |= limit & 0xf0000;
297 }
298 }
300 good:
301 return 1;
302 bad:
303 return 0;
304 }
306 void memguard_guard_stack(void *p)
307 {
308 memguard_guard_range(p, PAGE_SIZE);
309 }
311 /*
312 * Local variables:
313 * mode: C
314 * c-set-style: "BSD"
315 * c-basic-offset: 4
316 * tab-width: 4
317 * indent-tabs-mode: nil
318 * End:
319 */