ia64/xen-unstable

view xen/include/asm-x86/paging.h @ 15812:86a154e1ef5d

[HVM] Shadow: don't shadow the p2m table.
For HVM vcpus with paging disabled, we used to shadow the p2m table,
and skip the p2m lookup to go from gfn to mfn. Instead, we now
provide a simple pagetable that gives a one-to-one mapping of 4GB, and
shadow that, making the translations from gfn to mfn via the p2m.
This removes the paging-disabled special-case code from the shadow
fault handler, and allows us to expand the p2m interface, since all HVM
translations now go through the same p2m lookups.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Fri Aug 31 11:06:22 2007 +0100 (2007-08-31)
parents b8e8061c5a98
children db9f62d8f7f4
line source
1 /******************************************************************************
2 * include/asm-x86/paging.h
3 *
4 * Common interface for paging support
5 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
7 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
8 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
25 #ifndef _XEN_PAGING_H
26 #define _XEN_PAGING_H
28 #include <xen/mm.h>
29 #include <public/domctl.h>
30 #include <xen/sched.h>
31 #include <xen/perfc.h>
32 #include <xen/domain_page.h>
33 #include <asm/flushtlb.h>
34 #include <asm/domain.h>
36 /*****************************************************************************
37 * Macros to tell which paging mode a domain is in */
39 #define PG_SH_shift 20
40 #define PG_HAP_shift 21
41 /* We're in one of the shadow modes */
42 #define PG_SH_enable (1U << PG_SH_shift)
43 #define PG_HAP_enable (1U << PG_HAP_shift)
45 /* common paging mode bits */
46 #define PG_mode_shift 10
47 /* Refcounts based on shadow tables instead of guest tables */
48 #define PG_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift)
49 /* Enable log dirty mode */
50 #define PG_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift)
51 /* Xen does p2m translation, not guest */
52 #define PG_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift)
53 /* Xen does not steal address space from the domain for its own booking;
54 * requires VT or similar mechanisms */
55 #define PG_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
57 #define paging_mode_enabled(_d) ((_d)->arch.paging.mode)
58 #define paging_mode_shadow(_d) ((_d)->arch.paging.mode & PG_SH_enable)
59 #define paging_mode_hap(_d) ((_d)->arch.paging.mode & PG_HAP_enable)
61 #define paging_mode_refcounts(_d) ((_d)->arch.paging.mode & PG_refcounts)
62 #define paging_mode_log_dirty(_d) ((_d)->arch.paging.mode & PG_log_dirty)
63 #define paging_mode_translate(_d) ((_d)->arch.paging.mode & PG_translate)
64 #define paging_mode_external(_d) ((_d)->arch.paging.mode & PG_external)
66 /* flags used for paging debug */
67 #define PAGING_DEBUG_LOGDIRTY 0
69 /*****************************************************************************
70 * Mode-specific entry points into the shadow code.
71 *
72 * These shouldn't be used directly by callers; rather use the functions
73 * below which will indirect through this table as appropriate. */
75 struct sh_emulate_ctxt;
76 struct shadow_paging_mode {
77 void (*detach_old_tables )(struct vcpu *v);
78 int (*x86_emulate_write )(struct vcpu *v, unsigned long va,
79 void *src, u32 bytes,
80 struct sh_emulate_ctxt *sh_ctxt);
81 int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va,
82 unsigned long old,
83 unsigned long new,
84 unsigned int bytes,
85 struct sh_emulate_ctxt *sh_ctxt);
86 int (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va,
87 unsigned long old_lo,
88 unsigned long old_hi,
89 unsigned long new_lo,
90 unsigned long new_hi,
91 struct sh_emulate_ctxt *sh_ctxt);
92 mfn_t (*make_monitor_table )(struct vcpu *v);
93 void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
94 int (*guess_wrmap )(struct vcpu *v,
95 unsigned long vaddr, mfn_t gmfn);
96 /* For outsiders to tell what mode we're in */
97 unsigned int shadow_levels;
98 };
101 /************************************************/
102 /* common paging interface */
103 /************************************************/
104 struct paging_mode {
105 int (*page_fault )(struct vcpu *v, unsigned long va,
106 struct cpu_user_regs *regs);
107 int (*invlpg )(struct vcpu *v, unsigned long va);
108 unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va);
109 void (*update_cr3 )(struct vcpu *v, int do_locking);
110 void (*update_paging_modes )(struct vcpu *v);
111 void (*write_p2m_entry )(struct vcpu *v, unsigned long gfn,
112 l1_pgentry_t *p, mfn_t table_mfn,
113 l1_pgentry_t new,
114 unsigned int level);
115 int (*write_guest_entry )(struct vcpu *v, intpte_t *p,
116 intpte_t new, mfn_t gmfn);
117 int (*cmpxchg_guest_entry )(struct vcpu *v, intpte_t *p,
118 intpte_t *old, intpte_t new,
119 mfn_t gmfn);
120 void * (*guest_map_l1e )(struct vcpu *v, unsigned long va,
121 unsigned long *gl1mfn);
122 void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va,
123 void *eff_l1e);
124 unsigned int guest_levels;
126 /* paging support extension */
127 struct shadow_paging_mode shadow;
128 };
130 /*****************************************************************************
131 * Log dirty code */
133 /* allocate log dirty bitmap resource for recording dirty pages */
134 int paging_alloc_log_dirty_bitmap(struct domain *d);
136 /* free log dirty bitmap resource */
137 void paging_free_log_dirty_bitmap(struct domain *d);
139 /* enable log dirty */
140 int paging_log_dirty_enable(struct domain *d);
142 /* disable log dirty */
143 int paging_log_dirty_disable(struct domain *d);
145 /* log dirty initialization */
146 void paging_log_dirty_init(struct domain *d,
147 int (*enable_log_dirty)(struct domain *d),
148 int (*disable_log_dirty)(struct domain *d),
149 void (*clean_dirty_bitmap)(struct domain *d));
151 /* mark a page as dirty */
152 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
154 /*****************************************************************************
155 * Entry points into the paging-assistance code */
157 /* Initialize the paging resource for vcpu struct. It is called by
158 * vcpu_initialise() in domain.c */
159 void paging_vcpu_init(struct vcpu *v);
161 /* Set up the paging-assistance-specific parts of a domain struct at
162 * start of day. Called for every domain from arch_domain_create() */
163 void paging_domain_init(struct domain *d);
165 /* Handler for paging-control ops: operations from user-space to enable
166 * and disable ephemeral shadow modes (test mode and log-dirty mode) and
167 * manipulate the log-dirty bitmap. */
168 int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
169 XEN_GUEST_HANDLE(void) u_domctl);
171 /* Call when destroying a domain */
172 void paging_teardown(struct domain *d);
174 /* Call once all of the references to the domain have gone away */
175 void paging_final_teardown(struct domain *d);
177 /* Enable an arbitrary paging-assistance mode. Call once at domain
178 * creation. */
179 int paging_enable(struct domain *d, u32 mode);
182 /* Page fault handler
183 * Called from pagefault handler in Xen, and from the HVM trap handlers
184 * for pagefaults. Returns 1 if this fault was an artefact of the
185 * paging code (and the guest should retry) or 0 if it is not (and the
186 * fault should be handled elsewhere or passed to the guest).
187 *
188 * Note: under shadow paging, this function handles all page faults;
189 * however, for hardware-assisted paging, this function handles only
190 * host page faults (i.e. nested page faults). */
191 static inline int
192 paging_fault(unsigned long va, struct cpu_user_regs *regs)
193 {
194 struct vcpu *v = current;
195 return v->arch.paging.mode->page_fault(v, va, regs);
196 }
198 /* Handle invlpg requests on vcpus.
199 * Returns 1 if the invlpg instruction should be issued on the hardware,
200 * or 0 if it's safe not to do so. */
201 static inline int paging_invlpg(struct vcpu *v, unsigned long va)
202 {
203 return v->arch.paging.mode->invlpg(v, va);
204 }
206 /* Translate a guest virtual address to the frame number that the
207 * *guest* pagetables would map it to. Returns INVALID_GFN if the guest
208 * tables don't map this address. */
209 #define INVALID_GFN (-1UL)
210 static inline unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va)
211 {
212 return v->arch.paging.mode->gva_to_gfn(v, va);
213 }
215 /* Update all the things that are derived from the guest's CR3.
216 * Called when the guest changes CR3; the caller can then use v->arch.cr3
217 * as the value to load into the host CR3 to schedule this vcpu */
218 static inline void paging_update_cr3(struct vcpu *v)
219 {
220 v->arch.paging.mode->update_cr3(v, 1);
221 }
223 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
224 * Called to initialize paging structures if the paging mode
225 * has changed, and when bringing up a VCPU for the first time. */
226 static inline void paging_update_paging_modes(struct vcpu *v)
227 {
228 v->arch.paging.mode->update_paging_modes(v);
229 }
232 /* Write a new value into the guest pagetable, and update the
233 * paging-assistance state appropriately. Returns 0 if we page-faulted,
234 * 1 for success. */
235 static inline int paging_write_guest_entry(struct vcpu *v, intpte_t *p,
236 intpte_t new, mfn_t gmfn)
237 {
238 if ( unlikely(paging_mode_enabled(v->domain)
239 && v->arch.paging.mode != NULL) )
240 return v->arch.paging.mode->write_guest_entry(v, p, new, gmfn);
241 else
242 return (!__copy_to_user(p, &new, sizeof(new)));
243 }
246 /* Cmpxchg a new value into the guest pagetable, and update the
247 * paging-assistance state appropriately. Returns 0 if we page-faulted,
248 * 1 if not. N.B. caller should check the value of "old" to see if the
249 * cmpxchg itself was successful. */
250 static inline int paging_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
251 intpte_t *old, intpte_t new,
252 mfn_t gmfn)
253 {
254 if ( unlikely(paging_mode_enabled(v->domain)
255 && v->arch.paging.mode != NULL) )
256 return v->arch.paging.mode->cmpxchg_guest_entry(v, p, old, new, gmfn);
257 else
258 return (!cmpxchg_user(p, *old, new));
259 }
261 /* Helper function that writes a pte in such a way that a concurrent read
262 * never sees a half-written entry that has _PAGE_PRESENT set */
263 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)
264 {
265 #if CONFIG_PAGING_LEVELS == 3
266 /* PAE machines write 64bit PTEs as two 32bit writes. */
267 volatile unsigned long *d = (unsigned long *) p;
268 unsigned long *s = (unsigned long *) &new;
269 BUILD_BUG_ON(sizeof (l1_pgentry_t) != 2 * sizeof (unsigned long));
270 d[0] = 0;
271 d[1] = s[1];
272 d[0] = s[0];
273 #else
274 *p = new;
275 #endif
276 }
278 /* Atomically write a P2M entry and update the paging-assistance state
279 * appropriately.
280 * Arguments: the domain in question, the GFN whose mapping is being updated,
281 * a pointer to the entry to be written, the MFN in which the entry resides,
282 * the new contents of the entry, and the level in the p2m tree at which
283 * we are writing. */
284 static inline void paging_write_p2m_entry(struct domain *d, unsigned long gfn,
285 l1_pgentry_t *p, mfn_t table_mfn,
286 l1_pgentry_t new, unsigned int level)
287 {
288 struct vcpu *v = current;
289 if ( v->domain != d )
290 v = d->vcpu[0];
291 if ( likely(v && paging_mode_enabled(d) && v->arch.paging.mode != NULL) )
292 {
293 return v->arch.paging.mode->write_p2m_entry(v, gfn, p, table_mfn,
294 new, level);
295 }
296 else
297 safe_write_pte(p, new);
298 }
300 /* Print paging-assistance info to the console */
301 void paging_dump_domain_info(struct domain *d);
302 void paging_dump_vcpu_info(struct vcpu *v);
305 /*****************************************************************************
306 * Access to the guest pagetables */
308 /* Get a mapping of a PV guest's l1e for this virtual address. */
309 static inline void *
310 guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn)
311 {
312 l2_pgentry_t l2e;
314 if ( unlikely(paging_mode_translate(v->domain)) )
315 return v->arch.paging.mode->guest_map_l1e(v, addr, gl1mfn);
317 /* Find this l1e and its enclosing l1mfn in the linear map */
318 if ( __copy_from_user(&l2e,
319 &__linear_l2_table[l2_linear_offset(addr)],
320 sizeof(l2_pgentry_t)) != 0 )
321 return NULL;
322 /* Check flags that it will be safe to read the l1e */
323 if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE))
324 != _PAGE_PRESENT )
325 return NULL;
326 *gl1mfn = l2e_get_pfn(l2e);
327 return &__linear_l1_table[l1_linear_offset(addr)];
328 }
330 /* Pull down the mapping we got from guest_map_l1e() */
331 static inline void
332 guest_unmap_l1e(struct vcpu *v, void *p)
333 {
334 if ( unlikely(paging_mode_translate(v->domain)) )
335 unmap_domain_page(p);
336 }
338 /* Read the guest's l1e that maps this address. */
339 static inline void
340 guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
341 {
342 if ( likely(!paging_mode_translate(v->domain)) )
343 {
344 ASSERT(!paging_mode_external(v->domain));
345 if ( __copy_from_user(eff_l1e,
346 &__linear_l1_table[l1_linear_offset(addr)],
347 sizeof(l1_pgentry_t)) != 0 )
348 *(l1_pgentry_t *)eff_l1e = l1e_empty();
349 return;
350 }
352 v->arch.paging.mode->guest_get_eff_l1e(v, addr, eff_l1e);
353 }
355 /* Read the guest's l1e that maps this address, from the kernel-mode
356 * pagetables. */
357 static inline void
358 guest_get_eff_kern_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
359 {
360 #if defined(__x86_64__)
361 int user_mode = !(v->arch.flags & TF_kernel_mode);
362 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
363 #else
364 #define TOGGLE_MODE() ((void)0)
365 #endif
367 TOGGLE_MODE();
368 guest_get_eff_l1e(v, addr, eff_l1e);
369 TOGGLE_MODE();
370 }
374 #endif /* XEN_PAGING_H */
376 /*
377 * Local variables:
378 * mode: C
379 * c-set-style: "BSD"
380 * c-basic-offset: 4
381 * indent-tabs-mode: nil
382 * End:
383 */