ia64/xen-unstable

view xen/arch/x86/x86_64/mm.c @ 6552:a9873d384da4

Merge.
author adsharma@los-vmm.sc.intel.com
date Thu Aug 25 12:24:48 2005 -0700 (2005-08-25)
parents 112d44270733 fa0754a9f64f
children dfaf788ab18c
line source
1 /******************************************************************************
2 * arch/x86/x86_64/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser tr This
5 * program is free software; you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc., 59
17 * Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <xen/config.h>
21 #include <xen/lib.h>
22 #include <xen/init.h>
23 #include <xen/mm.h>
24 #include <xen/sched.h>
25 #include <asm/current.h>
26 #include <asm/asm_defns.h>
27 #include <asm/page.h>
28 #include <asm/flushtlb.h>
29 #include <asm/fixmap.h>
30 #include <asm/msr.h>
32 struct pfn_info *alloc_xen_pagetable(void)
33 {
34 extern int early_boot;
35 unsigned long pfn;
37 if ( !early_boot )
38 return alloc_domheap_page(NULL);
40 pfn = alloc_boot_pages(1, 1);
41 return ((pfn == 0) ? NULL : pfn_to_page(pfn));
42 }
44 void free_xen_pagetable(struct pfn_info *pg)
45 {
46 free_domheap_page(pg);
47 }
49 l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
50 {
51 l4_pgentry_t *pl4e;
52 l3_pgentry_t *pl3e;
53 l2_pgentry_t *pl2e;
55 pl4e = &idle_pg_table[l4_table_offset(v)];
56 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
57 {
58 pl3e = page_to_virt(alloc_xen_pagetable());
59 clear_page(pl3e);
60 *pl4e = l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR);
61 }
63 pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v);
64 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
65 {
66 pl2e = page_to_virt(alloc_xen_pagetable());
67 clear_page(pl2e);
68 *pl3e = l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR);
69 }
71 pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v);
72 return pl2e;
73 }
75 void __init paging_init(void)
76 {
77 unsigned long i, mpt_size;
78 l3_pgentry_t *l3_ro_mpt;
79 l2_pgentry_t *l2_ro_mpt;
80 struct pfn_info *pg;
82 idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
84 /* Create user-accessible L2 directory to map the MPT for guests. */
85 l3_ro_mpt = alloc_xenheap_page();
86 clear_page(l3_ro_mpt);
87 idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] =
88 l4e_from_page(
89 virt_to_page(l3_ro_mpt), __PAGE_HYPERVISOR | _PAGE_USER);
90 l2_ro_mpt = alloc_xenheap_page();
91 clear_page(l2_ro_mpt);
92 l3_ro_mpt[l3_table_offset(RO_MPT_VIRT_START)] =
93 l3e_from_page(
94 virt_to_page(l2_ro_mpt), __PAGE_HYPERVISOR | _PAGE_USER);
95 l2_ro_mpt += l2_table_offset(RO_MPT_VIRT_START);
97 /*
98 * Allocate and map the machine-to-phys table.
99 * This also ensures L3 is present for fixmaps.
100 */
101 mpt_size = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL;
102 mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
103 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
104 {
105 if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
106 panic("Not enough memory for m2p table\n");
107 map_pages_to_xen(
108 RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), page_to_pfn(pg),
109 1UL << PAGETABLE_ORDER,
110 PAGE_HYPERVISOR);
111 memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
112 1UL << L2_PAGETABLE_SHIFT);
113 *l2_ro_mpt++ = l2e_from_page(
114 pg, _PAGE_GLOBAL|_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT);
115 BUG_ON(((unsigned long)l2_ro_mpt & ~PAGE_MASK) == 0);
116 }
118 /* Set up linear page table mapping. */
119 idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
120 l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR);
121 }
123 void __init zap_low_mappings(void)
124 {
125 idle_pg_table[0] = l4e_empty();
126 flush_tlb_all_pge();
127 }
129 void subarch_init_memory(struct domain *dom_xen)
130 {
131 unsigned long i, v, m2p_start_mfn;
132 l3_pgentry_t l3e;
133 l2_pgentry_t l2e;
135 /*
136 * We are rather picky about the layout of 'struct pfn_info'. The
137 * count_info and domain fields must be adjacent, as we perform atomic
138 * 64-bit operations on them.
139 */
140 if ( (offsetof(struct pfn_info, u.inuse._domain) !=
141 (offsetof(struct pfn_info, count_info) + sizeof(u32))) )
142 {
143 printk("Weird pfn_info layout (%ld,%ld,%ld)\n",
144 offsetof(struct pfn_info, count_info),
145 offsetof(struct pfn_info, u.inuse._domain),
146 sizeof(struct pfn_info));
147 for ( ; ; ) ;
148 }
150 /* M2P table is mappable read-only by privileged domains. */
151 for ( v = RDWR_MPT_VIRT_START;
152 v != RDWR_MPT_VIRT_END;
153 v += 1 << L2_PAGETABLE_SHIFT )
154 {
155 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
156 l3_table_offset(v)];
157 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
158 continue;
159 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
160 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
161 continue;
162 m2p_start_mfn = l2e_get_pfn(l2e);
164 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
165 {
166 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
167 /* gdt to make sure it's only mapped read-only by non-privileged
168 domains. */
169 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
170 page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
171 }
172 }
173 }
175 long do_stack_switch(unsigned long ss, unsigned long esp)
176 {
177 if ( (ss & 3) != 3 )
178 return -EPERM;
179 current->arch.guest_context.kernel_ss = ss;
180 current->arch.guest_context.kernel_sp = esp;
181 return 0;
182 }
184 long do_set_segment_base(unsigned int which, unsigned long base)
185 {
186 struct vcpu *v = current;
187 long ret = 0;
189 switch ( which )
190 {
191 case SEGBASE_FS:
192 if ( wrmsr_user(MSR_FS_BASE, base, base>>32) )
193 ret = -EFAULT;
194 else
195 v->arch.guest_context.fs_base = base;
196 break;
198 case SEGBASE_GS_USER:
199 if ( wrmsr_user(MSR_SHADOW_GS_BASE, base, base>>32) )
200 ret = -EFAULT;
201 else
202 v->arch.guest_context.gs_base_user = base;
203 break;
205 case SEGBASE_GS_KERNEL:
206 if ( wrmsr_user(MSR_GS_BASE, base, base>>32) )
207 ret = -EFAULT;
208 else
209 v->arch.guest_context.gs_base_kernel = base;
210 break;
212 case SEGBASE_GS_USER_SEL:
213 __asm__ __volatile__ (
214 " swapgs \n"
215 "1: movl %k0,%%gs \n"
216 " "safe_swapgs" \n"
217 ".section .fixup,\"ax\" \n"
218 "2: xorl %k0,%k0 \n"
219 " jmp 1b \n"
220 ".previous \n"
221 ".section __ex_table,\"a\"\n"
222 " .align 8 \n"
223 " .quad 1b,2b \n"
224 ".previous "
225 : : "r" (base&0xffff) );
226 break;
228 default:
229 ret = -EINVAL;
230 break;
231 }
233 return ret;
234 }
237 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
238 int check_descriptor(struct desc_struct *d)
239 {
240 u32 a = d->a, b = d->b;
242 /* A not-present descriptor will always fault, so is safe. */
243 if ( !(b & _SEGMENT_P) )
244 goto good;
246 /* The guest can only safely be executed in ring 3. */
247 if ( (b & _SEGMENT_DPL) != _SEGMENT_DPL )
248 goto bad;
250 /* All code and data segments are okay. No base/limit checking. */
251 if ( (b & _SEGMENT_S) )
252 goto good;
254 /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
255 if ( (b & _SEGMENT_TYPE) == 0x000 )
256 goto good;
258 /* Everything but a call gate is discarded here. */
259 if ( (b & _SEGMENT_TYPE) != 0xc00 )
260 goto bad;
262 /* Can't allow far jump to a Xen-private segment. */
263 if ( !VALID_CODESEL(a>>16) )
264 goto bad;
266 /* Reserved bits must be zero. */
267 if ( (b & 0xe0) != 0 )
268 goto bad;
270 good:
271 return 1;
272 bad:
273 return 0;
274 }
276 void memguard_guard_stack(void *p)
277 {
278 p = (void *)((unsigned long)p + PAGE_SIZE);
279 memguard_guard_range(p, 2 * PAGE_SIZE);
280 }
282 /*
283 * Local variables:
284 * mode: C
285 * c-set-style: "BSD"
286 * c-basic-offset: 4
287 * tab-width: 4
288 * indent-tabs-mode: nil
289 * End:
290 */