direct-io.hg

view xen/arch/x86/x86_64/mm.c @ 4479:78c506cbdb5f

bitkeeper revision 1.1274 (4252b481LlZ-TCB7yUWh6cpv21lxZQ)

Fix canonicalising linear base address in set_segment_base hypercall.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Apr 05 15:53:37 2005 +0000 (2005-04-05)
parents 445b12a7221a
children d946608e6ddc
line source
1 /******************************************************************************
2 * arch/x86/x86_64/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <xen/sched.h>
26 #include <asm/page.h>
27 #include <asm/flushtlb.h>
28 #include <asm/fixmap.h>
29 #include <asm/msr.h>
31 static void *safe_page_alloc(void)
32 {
33 extern int early_boot;
34 if ( early_boot )
35 {
36 unsigned long p = alloc_boot_pages(PAGE_SIZE, PAGE_SIZE);
37 if ( p == 0 )
38 goto oom;
39 return phys_to_virt(p);
40 }
41 else
42 {
43 struct pfn_info *pg = alloc_domheap_page(NULL);
44 if ( pg == NULL )
45 goto oom;
46 return page_to_virt(pg);
47 }
48 oom:
49 panic("Out of memory");
50 return NULL;
51 }
53 /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
54 #define __PTE_MASK (~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_PCD|_PAGE_PWT))
55 int map_pages(
56 root_pgentry_t *pt,
57 unsigned long v,
58 unsigned long p,
59 unsigned long s,
60 unsigned long flags)
61 {
62 l4_pgentry_t *pl4e;
63 l3_pgentry_t *pl3e;
64 l2_pgentry_t *pl2e;
65 l1_pgentry_t *pl1e;
66 void *newpg;
68 while ( s != 0 )
69 {
70 pl4e = &pt[l4_table_offset(v)];
71 if ( !(l4_pgentry_val(*pl4e) & _PAGE_PRESENT) )
72 {
73 newpg = safe_page_alloc();
74 clear_page(newpg);
75 *pl4e = mk_l4_pgentry(__pa(newpg) | (flags & __PTE_MASK));
76 }
78 pl3e = l4_pgentry_to_l3(*pl4e) + l3_table_offset(v);
79 if ( !(l3_pgentry_val(*pl3e) & _PAGE_PRESENT) )
80 {
81 newpg = safe_page_alloc();
82 clear_page(newpg);
83 *pl3e = mk_l3_pgentry(__pa(newpg) | (flags & __PTE_MASK));
84 }
86 pl2e = l3_pgentry_to_l2(*pl3e) + l2_table_offset(v);
88 if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
89 {
90 /* Super-page mapping. */
91 if ( (l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
92 local_flush_tlb_pge();
93 *pl2e = mk_l2_pgentry(p|flags|_PAGE_PSE);
95 v += 1 << L2_PAGETABLE_SHIFT;
96 p += 1 << L2_PAGETABLE_SHIFT;
97 s -= 1 << L2_PAGETABLE_SHIFT;
98 }
99 else
100 {
101 /* Normal page mapping. */
102 if ( !(l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
103 {
104 newpg = safe_page_alloc();
105 clear_page(newpg);
106 *pl2e = mk_l2_pgentry(__pa(newpg) | (flags & __PTE_MASK));
107 }
108 pl1e = l2_pgentry_to_l1(*pl2e) + l1_table_offset(v);
109 if ( (l1_pgentry_val(*pl1e) & _PAGE_PRESENT) )
110 local_flush_tlb_one(v);
111 *pl1e = mk_l1_pgentry(p|flags);
113 v += 1 << L1_PAGETABLE_SHIFT;
114 p += 1 << L1_PAGETABLE_SHIFT;
115 s -= 1 << L1_PAGETABLE_SHIFT;
116 }
117 }
119 return 0;
120 }
122 void __set_fixmap(
123 enum fixed_addresses idx, unsigned long p, unsigned long flags)
124 {
125 if ( unlikely(idx >= __end_of_fixed_addresses) )
126 BUG();
127 map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags);
128 }
131 void __init paging_init(void)
132 {
133 unsigned long i, p, max;
134 l3_pgentry_t *l3rw, *l3ro;
135 struct pfn_info *pg;
137 /* Map all of physical memory. */
138 max = ((max_page + L1_PAGETABLE_ENTRIES - 1) &
139 ~(L1_PAGETABLE_ENTRIES - 1)) << PAGE_SHIFT;
140 map_pages(idle_pg_table, PAGE_OFFSET, 0, max, PAGE_HYPERVISOR);
142 /*
143 * Allocate and map the machine-to-phys table.
144 * This also ensures L3 is present for ioremap().
145 */
146 for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
147 {
148 pg = alloc_domheap_pages(
149 NULL, L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT);
150 if ( pg == NULL )
151 panic("Not enough memory for m2p table\n");
152 p = page_to_phys(pg);
153 map_pages(idle_pg_table, RDWR_MPT_VIRT_START + i*8, p,
154 1UL << L2_PAGETABLE_SHIFT, PAGE_HYPERVISOR | _PAGE_USER);
155 memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55,
156 1UL << L2_PAGETABLE_SHIFT);
157 }
159 /*
160 * Above we mapped the M2P table as user-accessible and read-writable.
161 * Fix security by denying user access at the top level of the page table.
162 */
163 idle_pg_table[l4_table_offset(RDWR_MPT_VIRT_START)] =
164 mk_l4_pgentry(l4_pgentry_val(
165 idle_pg_table[l4_table_offset(RDWR_MPT_VIRT_START)]) &
166 ~_PAGE_USER);
168 /* Create read-only mapping of MPT for guest-OS use. */
169 l3ro = (l3_pgentry_t *)alloc_xenheap_page();
170 clear_page(l3ro);
171 idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] =
172 mk_l4_pgentry((__pa(l3ro) | __PAGE_HYPERVISOR | _PAGE_USER) &
173 ~_PAGE_RW);
174 /* Copy the L3 mappings from the RDWR_MPT area. */
175 l3rw = l4_pgentry_to_l3(
176 idle_pg_table[l4_table_offset(RDWR_MPT_VIRT_START)]);
177 l3rw += l3_table_offset(RDWR_MPT_VIRT_START);
178 l3ro += l3_table_offset(RO_MPT_VIRT_START);
179 memcpy(l3ro, l3rw,
180 (RDWR_MPT_VIRT_END - RDWR_MPT_VIRT_START) >> L3_PAGETABLE_SHIFT);
182 /* Set up linear page table mapping. */
183 idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
184 mk_l4_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
185 }
187 void __init zap_low_mappings(void)
188 {
189 idle_pg_table[0] = mk_l4_pgentry(0);
190 flush_tlb_all_pge();
191 }
193 void subarch_init_memory(struct domain *dom_xen)
194 {
195 unsigned long i, v, m2p_start_mfn;
196 l3_pgentry_t l3e;
197 l2_pgentry_t l2e;
199 /*
200 * We are rather picky about the layout of 'struct pfn_info'. The
201 * count_info and domain fields must be adjacent, as we perform atomic
202 * 64-bit operations on them.
203 */
204 if ( (offsetof(struct pfn_info, u.inuse._domain) !=
205 (offsetof(struct pfn_info, count_info) + sizeof(u32))) )
206 {
207 printk("Weird pfn_info layout (%ld,%ld,%d)\n",
208 offsetof(struct pfn_info, count_info),
209 offsetof(struct pfn_info, u.inuse._domain),
210 sizeof(struct pfn_info));
211 for ( ; ; ) ;
212 }
214 /* M2P table is mappable read-only by privileged domains. */
215 for ( v = RDWR_MPT_VIRT_START;
216 v != RDWR_MPT_VIRT_END;
217 v += 1 << L2_PAGETABLE_SHIFT )
218 {
219 l3e = l4_pgentry_to_l3(idle_pg_table[l4_table_offset(v)])[
220 l3_table_offset(v)];
221 if ( !(l3_pgentry_val(l3e) & _PAGE_PRESENT) )
222 continue;
223 l2e = l3_pgentry_to_l2(l3e)[l2_table_offset(v)];
224 if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
225 continue;
226 m2p_start_mfn = l2_pgentry_to_pfn(l2e);
228 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
229 {
230 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
231 /* gdt to make sure it's only mapped read-only by non-privileged
232 domains. */
233 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
234 page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
235 }
236 }
237 }
239 long do_stack_switch(unsigned long ss, unsigned long esp)
240 {
241 if ( (ss & 3) != 3 )
242 return -EPERM;
243 current->arch.kernel_ss = ss;
244 current->arch.kernel_sp = esp;
245 return 0;
246 }
248 long do_set_segment_base(unsigned int which, unsigned long base)
249 {
250 struct exec_domain *ed = current;
252 /* Canonicalise the base address. */
253 base |= ~VADDR_MASK;
255 switch ( which )
256 {
257 case SEGBASE_FS:
258 ed->arch.user_ctxt.fs_base = base;
259 wrmsr(MSR_FS_BASE, base, base>>32);
260 break;
262 case SEGBASE_GS_USER:
263 ed->arch.user_ctxt.gs_base_user = base;
264 wrmsr(MSR_SHADOW_GS_BASE, base, base>>32);
265 break;
267 case SEGBASE_GS_KERNEL:
268 ed->arch.user_ctxt.gs_base_kernel = base;
269 wrmsr(MSR_GS_BASE, base, base>>32);
270 break;
272 case SEGBASE_GS_USER_SEL:
273 __asm__ __volatile__ (
274 " swapgs \n"
275 "1: movl %k0,%%gs \n"
276 " mfence; swapgs \n" /* AMD erratum #88 */
277 ".section .fixup,\"ax\" \n"
278 "2: xorl %k0,%k0 \n"
279 " jmp 1b \n"
280 ".previous \n"
281 ".section __ex_table,\"a\"\n"
282 " .align 8 \n"
283 " .quad 1b,2b \n"
284 ".previous "
285 : : "r" (base&0xffff) );
286 break;
288 default:
289 return -EINVAL;
290 }
292 return 0;
293 }
296 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
297 int check_descriptor(struct desc_struct *d)
298 {
299 u32 a = d->a, b = d->b;
301 /* A not-present descriptor will always fault, so is safe. */
302 if ( !(b & _SEGMENT_P) )
303 goto good;
305 /* The guest can only safely be executed in ring 3. */
306 if ( (b & _SEGMENT_DPL) != _SEGMENT_DPL )
307 goto bad;
309 /* All code and data segments are okay. No base/limit checking. */
310 if ( (b & _SEGMENT_S) )
311 goto good;
313 /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
314 if ( (b & _SEGMENT_TYPE) == 0x000 )
315 goto good;
317 /* Everything but a call gate is discarded here. */
318 if ( (b & _SEGMENT_TYPE) != 0xc00 )
319 goto bad;
321 /* Can't allow far jump to a Xen-private segment. */
322 if ( !VALID_CODESEL(a>>16) )
323 goto bad;
325 /* Reserved bits must be zero. */
326 if ( (b & 0xe0) != 0 )
327 goto bad;
329 good:
330 return 1;
331 bad:
332 return 0;
333 }
336 #ifdef MEMORY_GUARD
338 #define ALLOC_PT(_level) \
339 do { \
340 (_level) = (_level ## _pgentry_t *)heap_start; \
341 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE); \
342 clear_page(_level); \
343 } while ( 0 )
344 void *memguard_init(void *heap_start)
345 {
346 l1_pgentry_t *l1 = NULL;
347 l2_pgentry_t *l2 = NULL;
348 l3_pgentry_t *l3 = NULL;
349 l4_pgentry_t *l4 = &idle_pg_table[l4_table_offset(PAGE_OFFSET)];
350 unsigned long i, j;
352 /* Round the allocation pointer up to a page boundary. */
353 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
354 PAGE_MASK);
356 /* Memory guarding is incompatible with super pages. */
357 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
358 {
359 ALLOC_PT(l1);
360 for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ )
361 l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
362 (j << L1_PAGETABLE_SHIFT) |
363 __PAGE_HYPERVISOR);
364 if ( !((unsigned long)l2 & (PAGE_SIZE-1)) )
365 {
366 ALLOC_PT(l2);
367 if ( !((unsigned long)l3 & (PAGE_SIZE-1)) )
368 {
369 ALLOC_PT(l3);
370 *l4++ = mk_l4_pgentry(virt_to_phys(l3) | __PAGE_HYPERVISOR);
371 }
372 *l3++ = mk_l3_pgentry(virt_to_phys(l2) | __PAGE_HYPERVISOR);
373 }
374 *l2++ = mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
375 }
377 return heap_start;
378 }
380 static void __memguard_change_range(void *p, unsigned long l, int guard)
381 {
382 l1_pgentry_t *l1;
383 l2_pgentry_t *l2;
384 l3_pgentry_t *l3;
385 l4_pgentry_t *l4;
386 unsigned long _p = (unsigned long)p;
387 unsigned long _l = (unsigned long)l;
389 /* Ensure we are dealing with a page-aligned whole number of pages. */
390 ASSERT((_p&PAGE_MASK) != 0);
391 ASSERT((_l&PAGE_MASK) != 0);
392 ASSERT((_p&~PAGE_MASK) == 0);
393 ASSERT((_l&~PAGE_MASK) == 0);
395 while ( _l != 0 )
396 {
397 l4 = &idle_pg_table[l4_table_offset(_p)];
398 l3 = l4_pgentry_to_l3(*l4) + l3_table_offset(_p);
399 l2 = l3_pgentry_to_l2(*l3) + l2_table_offset(_p);
400 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
401 if ( guard )
402 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
403 else
404 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
405 _p += PAGE_SIZE;
406 _l -= PAGE_SIZE;
407 }
408 }
410 void memguard_guard_stack(void *p)
411 {
412 p = (void *)((unsigned long)p + PAGE_SIZE);
413 memguard_guard_range(p, 2 * PAGE_SIZE);
414 }
416 void memguard_guard_range(void *p, unsigned long l)
417 {
418 __memguard_change_range(p, l, 1);
419 local_flush_tlb();
420 }
422 void memguard_unguard_range(void *p, unsigned long l)
423 {
424 __memguard_change_range(p, l, 0);
425 }
427 #endif
429 /*
430 * Local variables:
431 * mode: C
432 * c-set-style: "BSD"
433 * c-basic-offset: 4
434 * tab-width: 4
435 * indent-tabs-mode: nil
436 * End:
437 */