ia64/xen-unstable

view xen/arch/x86/x86_64/mm.c @ 8071:5d95ab80f290

Fix x86_64 build.

Signed-off-by: Steven Hand <steven@xensource.com>
author smh22@firebug.cl.cam.ac.uk
date Sat Nov 26 10:24:08 2005 +0100 (2005-11-26)
parents c3cfc4ff3b08
children b05e1c4bc31b
line source
1 /******************************************************************************
2 * arch/x86/x86_64/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser tr This
5 * program is free software; you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc., 59
17 * Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <xen/config.h>
21 #include <xen/lib.h>
22 #include <xen/init.h>
23 #include <xen/mm.h>
24 #include <xen/sched.h>
25 #include <asm/current.h>
26 #include <asm/asm_defns.h>
27 #include <asm/page.h>
28 #include <asm/flushtlb.h>
29 #include <asm/fixmap.h>
30 #include <asm/msr.h>
31 #include <public/memory.h>
33 struct pfn_info *alloc_xen_pagetable(void)
34 {
35 extern int early_boot;
36 unsigned long pfn;
38 if ( !early_boot )
39 return alloc_domheap_page(NULL);
41 pfn = alloc_boot_pages(1, 1);
42 return ((pfn == 0) ? NULL : pfn_to_page(pfn));
43 }
45 void free_xen_pagetable(struct pfn_info *pg)
46 {
47 free_domheap_page(pg);
48 }
50 l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
51 {
52 l4_pgentry_t *pl4e;
53 l3_pgentry_t *pl3e;
54 l2_pgentry_t *pl2e;
56 pl4e = &idle_pg_table[l4_table_offset(v)];
57 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
58 {
59 pl3e = page_to_virt(alloc_xen_pagetable());
60 clear_page(pl3e);
61 *pl4e = l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR);
62 }
64 pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v);
65 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
66 {
67 pl2e = page_to_virt(alloc_xen_pagetable());
68 clear_page(pl2e);
69 *pl3e = l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR);
70 }
72 pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v);
73 return pl2e;
74 }
76 void __init paging_init(void)
77 {
78 unsigned long i, mpt_size;
79 l3_pgentry_t *l3_ro_mpt;
80 l2_pgentry_t *l2_ro_mpt;
81 struct pfn_info *pg;
83 idle0_vcpu.arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
85 /* Create user-accessible L2 directory to map the MPT for guests. */
86 l3_ro_mpt = alloc_xenheap_page();
87 clear_page(l3_ro_mpt);
88 idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] =
89 l4e_from_page(
90 virt_to_page(l3_ro_mpt), __PAGE_HYPERVISOR | _PAGE_USER);
91 l2_ro_mpt = alloc_xenheap_page();
92 clear_page(l2_ro_mpt);
93 l3_ro_mpt[l3_table_offset(RO_MPT_VIRT_START)] =
94 l3e_from_page(
95 virt_to_page(l2_ro_mpt), __PAGE_HYPERVISOR | _PAGE_USER);
96 l2_ro_mpt += l2_table_offset(RO_MPT_VIRT_START);
98 /*
99 * Allocate and map the machine-to-phys table.
100 * This also ensures L3 is present for fixmaps.
101 */
102 mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
103 mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
104 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
105 {
106 if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
107 panic("Not enough memory for m2p table\n");
108 map_pages_to_xen(
109 RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), page_to_pfn(pg),
110 1UL << PAGETABLE_ORDER,
111 PAGE_HYPERVISOR);
112 memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
113 1UL << L2_PAGETABLE_SHIFT);
114 *l2_ro_mpt++ = l2e_from_page(
115 pg, _PAGE_GLOBAL|_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT);
116 BUG_ON(((unsigned long)l2_ro_mpt & ~PAGE_MASK) == 0);
117 }
119 /* Set up linear page table mapping. */
120 idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
121 l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR);
122 }
124 void __init zap_low_mappings(void)
125 {
126 idle_pg_table[0] = l4e_empty();
127 flush_tlb_all_pge();
128 }
130 void subarch_init_memory(struct domain *dom_xen)
131 {
132 unsigned long i, v, m2p_start_mfn;
133 l3_pgentry_t l3e;
134 l2_pgentry_t l2e;
136 /*
137 * We are rather picky about the layout of 'struct pfn_info'. The
138 * count_info and domain fields must be adjacent, as we perform atomic
139 * 64-bit operations on them.
140 */
141 if ( ((offsetof(struct pfn_info, u.inuse._domain) !=
142 (offsetof(struct pfn_info, count_info) + sizeof(u32)))) ||
143 ((offsetof(struct pfn_info, count_info) & 7) != 0) ||
144 (sizeof(struct pfn_info) != 40) )
145 {
146 printk("Weird pfn_info layout (%ld,%ld,%ld)\n",
147 offsetof(struct pfn_info, count_info),
148 offsetof(struct pfn_info, u.inuse._domain),
149 sizeof(struct pfn_info));
150 for ( ; ; ) ;
151 }
153 /* M2P table is mappable read-only by privileged domains. */
154 for ( v = RDWR_MPT_VIRT_START;
155 v != RDWR_MPT_VIRT_END;
156 v += 1 << L2_PAGETABLE_SHIFT )
157 {
158 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
159 l3_table_offset(v)];
160 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
161 continue;
162 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
163 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
164 continue;
165 m2p_start_mfn = l2e_get_pfn(l2e);
167 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
168 {
169 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
170 /* gdt to make sure it's only mapped read-only by non-privileged
171 domains. */
172 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
173 page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
174 }
175 }
176 }
178 long arch_memory_op(int op, void *arg)
179 {
180 struct xen_machphys_mfn_list xmml;
181 l3_pgentry_t l3e;
182 l2_pgentry_t l2e;
183 unsigned long mfn, v;
184 unsigned int i;
185 long rc = 0;
187 switch ( op )
188 {
189 case XENMEM_machphys_mfn_list:
190 if ( copy_from_user(&xmml, arg, sizeof(xmml)) )
191 return -EFAULT;
193 for ( i = 0, v = RDWR_MPT_VIRT_START; v != RDWR_MPT_VIRT_END;
194 i++, v += 1 << 21 )
195 {
196 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
197 l3_table_offset(v)];
198 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
199 break;
200 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
201 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
202 break;
203 mfn = l2e_get_pfn(l2e) + l1_table_offset(v);
204 if ( i == xmml.max_extents )
205 break;
206 if ( put_user(mfn, &xmml.extent_start[i]) )
207 return -EFAULT;
208 }
210 if ( put_user(i, &((struct xen_machphys_mfn_list *)arg)->nr_extents) )
211 return -EFAULT;
213 break;
215 default:
216 rc = -ENOSYS;
217 break;
218 }
220 return rc;
221 }
223 long do_stack_switch(unsigned long ss, unsigned long esp)
224 {
225 if ( (ss & 3) != 3 )
226 return -EPERM;
227 current->arch.guest_context.kernel_ss = ss;
228 current->arch.guest_context.kernel_sp = esp;
229 return 0;
230 }
232 long do_set_segment_base(unsigned int which, unsigned long base)
233 {
234 struct vcpu *v = current;
235 long ret = 0;
237 switch ( which )
238 {
239 case SEGBASE_FS:
240 if ( wrmsr_user(MSR_FS_BASE, base, base>>32) )
241 ret = -EFAULT;
242 else
243 v->arch.guest_context.fs_base = base;
244 break;
246 case SEGBASE_GS_USER:
247 if ( wrmsr_user(MSR_SHADOW_GS_BASE, base, base>>32) )
248 ret = -EFAULT;
249 else
250 v->arch.guest_context.gs_base_user = base;
251 break;
253 case SEGBASE_GS_KERNEL:
254 if ( wrmsr_user(MSR_GS_BASE, base, base>>32) )
255 ret = -EFAULT;
256 else
257 v->arch.guest_context.gs_base_kernel = base;
258 break;
260 case SEGBASE_GS_USER_SEL:
261 __asm__ __volatile__ (
262 " swapgs \n"
263 "1: movl %k0,%%gs \n"
264 " "safe_swapgs" \n"
265 ".section .fixup,\"ax\" \n"
266 "2: xorl %k0,%k0 \n"
267 " jmp 1b \n"
268 ".previous \n"
269 ".section __ex_table,\"a\"\n"
270 " .align 8 \n"
271 " .quad 1b,2b \n"
272 ".previous "
273 : : "r" (base&0xffff) );
274 break;
276 default:
277 ret = -EINVAL;
278 break;
279 }
281 return ret;
282 }
285 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
286 int check_descriptor(struct desc_struct *d)
287 {
288 u32 a = d->a, b = d->b;
290 /* A not-present descriptor will always fault, so is safe. */
291 if ( !(b & _SEGMENT_P) )
292 goto good;
294 /* The guest can only safely be executed in ring 3. */
295 if ( (b & _SEGMENT_DPL) != _SEGMENT_DPL )
296 goto bad;
298 /* All code and data segments are okay. No base/limit checking. */
299 if ( (b & _SEGMENT_S) )
300 goto good;
302 /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
303 if ( (b & _SEGMENT_TYPE) == 0x000 )
304 goto good;
306 /* Everything but a call gate is discarded here. */
307 if ( (b & _SEGMENT_TYPE) != 0xc00 )
308 goto bad;
310 /* Can't allow far jump to a Xen-private segment. */
311 if ( !VALID_CODESEL(a>>16) )
312 goto bad;
314 /* Reserved bits must be zero. */
315 if ( (b & 0xe0) != 0 )
316 goto bad;
318 good:
319 return 1;
320 bad:
321 return 0;
322 }
324 void memguard_guard_stack(void *p)
325 {
326 p = (void *)((unsigned long)p + PAGE_SIZE);
327 memguard_guard_range(p, 2 * PAGE_SIZE);
328 }
330 /*
331 * Local variables:
332 * mode: C
333 * c-set-style: "BSD"
334 * c-basic-offset: 4
335 * tab-width: 4
336 * indent-tabs-mode: nil
337 * End:
338 */