ia64/xen-unstable

view xen/arch/x86/mm/shadow/multi.c @ 16310:46f91ed0f7d1

Live migration with MMIO pages: fix shadow pte propagation.
Signed-off-by: Kieran Mansley <kmansley@solarflare.com>
author Keir Fraser <keir@xensource.com>
date Fri Nov 02 10:37:59 2007 +0000 (2007-11-02)
parents 9379c83e14b5
children db9f62d8f7f4
line source
1 /******************************************************************************
2 * arch/x86/mm/shadow/multi.c
3 *
4 * Simple, mostly-synchronous shadow page tables.
5 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
6 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
7 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/types.h>
26 #include <xen/mm.h>
27 #include <xen/trace.h>
28 #include <xen/sched.h>
29 #include <xen/perfc.h>
30 #include <xen/domain_page.h>
31 #include <asm/page.h>
32 #include <asm/current.h>
33 #include <asm/shadow.h>
34 #include <asm/flushtlb.h>
35 #include <asm/hvm/hvm.h>
36 #include <asm/hvm/cacheattr.h>
37 #include <asm/mtrr.h>
38 #include "private.h"
39 #include "types.h"
41 /* THINGS TO DO LATER:
42 *
43 * TEARDOWN HEURISTICS
44 * Also: have a heuristic for when to destroy a previous paging-mode's
45 * shadows. When a guest is done with its start-of-day 32-bit tables
46 * and reuses the memory we want to drop those shadows. Start with
47 * shadows in a page in two modes as a hint, but beware of clever tricks
48 * like reusing a pagetable for both PAE and 64-bit during boot...
49 *
50 * PAE LINEAR MAPS
51 * Rework shadow_get_l*e() to have the option of using map_domain_page()
52 * instead of linear maps. Add appropriate unmap_l*e calls in the users.
53 * Then we can test the speed difference made by linear maps. If the
54 * map_domain_page() version is OK on PAE, we could maybe allow a lightweight
55 * l3-and-l2h-only shadow mode for PAE PV guests that would allow them
56 * to share l2h pages again.
57 *
58 * GUEST_WALK_TABLES TLB FLUSH COALESCE
59 * guest_walk_tables can do up to three remote TLB flushes as it walks to
60 * the first l1 of a new pagetable. Should coalesce the flushes to the end,
61 * and if we do flush, re-do the walk. If anything has changed, then
62 * pause all the other vcpus and do the walk *again*.
63 *
64 * WP DISABLED
65 * Consider how to implement having the WP bit of CR0 set to 0.
66 * Since we need to be able to cause write faults to pagetables, this might
67 * end up looking like not having the (guest) pagetables present at all in
68 * HVM guests...
69 *
70 * PSE disabled / PSE36
71 * We don't support any modes other than PSE enabled, PSE36 disabled.
72 * Neither of those would be hard to change, but we'd need to be able to
73 * deal with shadows made in one mode and used in another.
74 */
76 #define FETCH_TYPE_PREFETCH 1
77 #define FETCH_TYPE_DEMAND 2
78 #define FETCH_TYPE_WRITE 4
79 typedef enum {
80 ft_prefetch = FETCH_TYPE_PREFETCH,
81 ft_demand_read = FETCH_TYPE_DEMAND,
82 ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,
83 } fetch_type_t;
85 #ifdef DEBUG_TRACE_DUMP
86 static char *fetch_type_names[] = {
87 [ft_prefetch] "prefetch",
88 [ft_demand_read] "demand read",
89 [ft_demand_write] "demand write",
90 };
91 #endif
93 /**************************************************************************/
94 /* Hash table mapping from guest pagetables to shadows
95 *
96 * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
97 * FL1's: maps the *gfn* of the start of a superpage to the mfn of a
98 * shadow L1 which maps its "splinters".
99 */
101 static inline mfn_t
102 get_fl1_shadow_status(struct vcpu *v, gfn_t gfn)
103 /* Look for FL1 shadows in the hash table */
104 {
105 mfn_t smfn = shadow_hash_lookup(v, gfn_x(gfn), SH_type_fl1_shadow);
106 return smfn;
107 }
109 static inline mfn_t
110 get_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
111 /* Look for shadows in the hash table */
112 {
113 mfn_t smfn = shadow_hash_lookup(v, mfn_x(gmfn), shadow_type);
114 perfc_incr(shadow_get_shadow_status);
115 return smfn;
116 }
118 static inline void
119 set_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
120 /* Put an FL1 shadow into the hash table */
121 {
122 SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
123 gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
125 shadow_hash_insert(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
126 }
128 static inline void
129 set_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
130 /* Put a shadow into the hash table */
131 {
132 struct domain *d = v->domain;
133 int res;
135 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
136 d->domain_id, v->vcpu_id, mfn_x(gmfn),
137 shadow_type, mfn_x(smfn));
139 /* 32-on-64 PV guests don't own their l4 pages so can't get_page them */
140 if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
141 {
142 res = get_page(mfn_to_page(gmfn), d);
143 ASSERT(res == 1);
144 }
146 shadow_hash_insert(v, mfn_x(gmfn), shadow_type, smfn);
147 }
149 static inline void
150 delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
151 /* Remove a shadow from the hash table */
152 {
153 SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
154 gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
155 shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
156 }
158 static inline void
159 delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
160 /* Remove a shadow from the hash table */
161 {
162 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
163 v->domain->domain_id, v->vcpu_id,
164 mfn_x(gmfn), shadow_type, mfn_x(smfn));
165 shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
166 /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
167 if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
168 put_page(mfn_to_page(gmfn));
169 }
171 /**************************************************************************/
172 /* CPU feature support querying */
174 static inline int
175 guest_supports_superpages(struct vcpu *v)
176 {
177 /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
178 * CR4.PSE is set or the guest is in PAE or long mode.
179 * It's also used in the dummy PT for vcpus with CR4.PG cleared. */
180 return (is_hvm_vcpu(v) &&
181 (GUEST_PAGING_LEVELS != 2
182 || !hvm_paging_enabled(v)
183 || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));
184 }
186 static inline int
187 guest_supports_nx(struct vcpu *v)
188 {
189 if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
190 return 0;
191 if ( !is_hvm_vcpu(v) )
192 return 1;
193 return hvm_nx_enabled(v);
194 }
197 /**************************************************************************/
198 /* Functions for walking the guest page tables */
201 /* Walk the guest pagetables, filling the walk_t with what we see.
202 * Takes an uninitialised walk_t. The caller must call unmap_walk()
203 * on the walk_t before discarding it or calling guest_walk_tables again.
204 * If "guest_op" is non-zero, we are serving a genuine guest memory access,
205 * and must (a) be under the shadow lock, and (b) remove write access
206 * from any gueat PT pages we see, as we will be using their contents to
207 * perform shadow updates.
208 * Returns 0 for success or non-zero if the guest pagetables are malformed.
209 * N.B. Finding a not-present entry does not cause a non-zero return code. */
210 static inline int
211 guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
212 {
213 struct domain *d = v->domain;
214 p2m_type_t p2mt;
215 ASSERT(!guest_op || shadow_locked_by_me(d));
217 perfc_incr(shadow_guest_walk);
218 memset(gw, 0, sizeof(*gw));
219 gw->va = va;
221 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
222 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
223 /* Get l4e from the top level table */
224 gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
225 gw->l4e = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable
226 + guest_l4_table_offset(va);
227 /* Walk down to the l3e */
228 if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
229 gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
230 if ( !p2m_is_ram(p2mt) ) return 1;
231 ASSERT(mfn_valid(gw->l3mfn));
232 /* This mfn is a pagetable: make sure the guest can't write to it. */
233 if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
234 flush_tlb_mask(d->domain_dirty_cpumask);
235 gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
236 + guest_l3_table_offset(va);
237 #else /* PAE only... */
238 /* Get l3e from the cache of the guest's top level table */
239 gw->l3e = (guest_l3e_t *)&v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
240 #endif /* PAE or 64... */
241 /* Walk down to the l2e */
242 if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
243 gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
244 if ( !p2m_is_ram(p2mt) ) return 1;
245 ASSERT(mfn_valid(gw->l2mfn));
246 /* This mfn is a pagetable: make sure the guest can't write to it. */
247 if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
248 flush_tlb_mask(d->domain_dirty_cpumask);
249 gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
250 + guest_l2_table_offset(va);
251 #else /* 32-bit only... */
252 /* Get l2e from the top level table */
253 gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
254 gw->l2e = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable
255 + guest_l2_table_offset(va);
256 #endif /* All levels... */
258 if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
259 if ( guest_supports_superpages(v) &&
260 (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) )
261 {
262 /* Special case: this guest VA is in a PSE superpage, so there's
263 * no guest l1e. We make one up so that the propagation code
264 * can generate a shadow l1 table. Start with the gfn of the
265 * first 4k-page of the superpage. */
266 gfn_t start = guest_l2e_get_gfn(*gw->l2e);
267 /* Grant full access in the l1e, since all the guest entry's
268 * access controls are enforced in the shadow l2e. This lets
269 * us reflect l2 changes later without touching the l1s. */
270 int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
271 _PAGE_ACCESSED|_PAGE_DIRTY);
272 /* propagate PWT PCD to level 1 for PSE */
273 if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PWT) )
274 flags |= _PAGE_PWT;
275 if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PCD) )
276 flags |= _PAGE_PCD;
277 /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
278 * of the level 1 */
279 if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) )
280 flags |= _PAGE_PAT;
281 /* Increment the pfn by the right number of 4k pages.
282 * The ~0x1 is to mask out the PAT bit mentioned above. */
283 start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
284 gw->eff_l1e = guest_l1e_from_gfn(start, flags);
285 gw->l1e = NULL;
286 gw->l1mfn = _mfn(INVALID_MFN);
287 }
288 else
289 {
290 /* Not a superpage: carry on and find the l1e. */
291 gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
292 if ( !p2m_is_ram(p2mt) ) return 1;
293 ASSERT(mfn_valid(gw->l1mfn));
294 /* This mfn is a pagetable: make sure the guest can't write to it. */
295 if ( guest_op
296 && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
297 flush_tlb_mask(d->domain_dirty_cpumask);
298 gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
299 + guest_l1_table_offset(va);
300 gw->eff_l1e = *gw->l1e;
301 }
303 return 0;
304 }
306 /* Given a walk_t, translate the gw->va into the guest's notion of the
307 * corresponding frame number. */
308 static inline gfn_t
309 guest_walk_to_gfn(walk_t *gw)
310 {
311 if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
312 return _gfn(INVALID_GFN);
313 return guest_l1e_get_gfn(gw->eff_l1e);
314 }
316 /* Given a walk_t, translate the gw->va into the guest's notion of the
317 * corresponding physical address. */
318 static inline paddr_t
319 guest_walk_to_gpa(walk_t *gw)
320 {
321 if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
322 return 0;
323 return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
324 }
327 /* Unmap (and reinitialise) a guest walk.
328 * Call this to dispose of any walk filled in by guest_walk_tables() */
329 static void unmap_walk(struct vcpu *v, walk_t *gw)
330 {
331 #if GUEST_PAGING_LEVELS >= 3
332 #if GUEST_PAGING_LEVELS >= 4
333 if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e);
334 #endif
335 if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e);
336 #endif
337 if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e);
338 #ifdef DEBUG
339 memset(gw, 0, sizeof(*gw));
340 #endif
341 }
344 /* Pretty-print the contents of a guest-walk */
345 static inline void print_gw(walk_t *gw)
346 {
347 SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va);
348 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
349 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
350 SHADOW_PRINTK(" l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn));
351 SHADOW_PRINTK(" l4e=%p\n", gw->l4e);
352 if ( gw->l4e )
353 SHADOW_PRINTK(" *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4);
354 SHADOW_PRINTK(" l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn));
355 #endif /* PAE or 64... */
356 SHADOW_PRINTK(" l3e=%p\n", gw->l3e);
357 if ( gw->l3e )
358 SHADOW_PRINTK(" *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3);
359 #endif /* All levels... */
360 SHADOW_PRINTK(" l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn));
361 SHADOW_PRINTK(" l2e=%p\n", gw->l2e);
362 if ( gw->l2e )
363 SHADOW_PRINTK(" *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2);
364 SHADOW_PRINTK(" l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn));
365 SHADOW_PRINTK(" l1e=%p\n", gw->l1e);
366 if ( gw->l1e )
367 SHADOW_PRINTK(" *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1);
368 SHADOW_PRINTK(" eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1);
369 }
372 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
373 /* Lightweight audit: pass all the shadows associated with this guest walk
374 * through the audit mechanisms */
375 static void sh_audit_gw(struct vcpu *v, walk_t *gw)
376 {
377 mfn_t smfn;
379 if ( !(SHADOW_AUDIT_ENABLE) )
380 return;
382 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
383 if ( mfn_valid(gw->l4mfn)
384 && mfn_valid((smfn = get_shadow_status(v, gw->l4mfn,
385 SH_type_l4_shadow))) )
386 (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
387 if ( mfn_valid(gw->l3mfn)
388 && mfn_valid((smfn = get_shadow_status(v, gw->l3mfn,
389 SH_type_l3_shadow))) )
390 (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
391 #endif /* PAE or 64... */
392 if ( mfn_valid(gw->l2mfn) )
393 {
394 if ( mfn_valid((smfn = get_shadow_status(v, gw->l2mfn,
395 SH_type_l2_shadow))) )
396 (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
397 #if GUEST_PAGING_LEVELS == 3
398 if ( mfn_valid((smfn = get_shadow_status(v, gw->l2mfn,
399 SH_type_l2h_shadow))) )
400 (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
401 #endif
402 }
403 if ( mfn_valid(gw->l1mfn)
404 && mfn_valid((smfn = get_shadow_status(v, gw->l1mfn,
405 SH_type_l1_shadow))) )
406 (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
407 else if ( gw->l2e
408 && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
409 && mfn_valid(
410 (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
411 (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
412 }
414 #else
415 #define sh_audit_gw(_v, _gw) do {} while(0)
416 #endif /* audit code */
420 /**************************************************************************/
421 /* Function to write to the guest tables, for propagating accessed and
422 * dirty bits from the shadow to the guest.
423 * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
424 * and an operation type. The guest entry is always passed as an l1e:
425 * since we only ever write flags, that's OK.
426 * Returns the new flag bits of the guest entry. */
428 static u32 guest_set_ad_bits(struct vcpu *v,
429 mfn_t gmfn,
430 guest_l1e_t *ep,
431 unsigned int level,
432 fetch_type_t ft)
433 {
434 u32 flags;
435 int res = 0;
437 ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
438 ASSERT(level <= GUEST_PAGING_LEVELS);
439 ASSERT(shadow_locked_by_me(v->domain));
441 flags = guest_l1e_get_flags(*ep);
443 /* Only set A and D bits for guest-initiated accesses */
444 if ( !(ft & FETCH_TYPE_DEMAND) )
445 return flags;
447 ASSERT(mfn_valid(gmfn)
448 && (sh_mfn_is_a_page_table(gmfn)
449 || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask)
450 == 0)));
452 /* PAE l3s do not have A and D bits */
453 ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
455 /* Need the D bit as well for writes, in L1es and PSE L2es. */
456 if ( ft == ft_demand_write
457 && (level == 1 ||
458 (level == 2 && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
459 {
460 if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED))
461 == (_PAGE_DIRTY | _PAGE_ACCESSED) )
462 return flags; /* Guest already has A and D bits set */
463 flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
464 perfc_incr(shadow_ad_update);
465 }
466 else
467 {
468 if ( flags & _PAGE_ACCESSED )
469 return flags; /* Guest already has A bit set */
470 flags |= _PAGE_ACCESSED;
471 perfc_incr(shadow_a_update);
472 }
474 /* Set the bit(s) */
475 paging_mark_dirty(v->domain, mfn_x(gmfn));
476 SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
477 "old flags = %#x, new flags = %#x\n",
478 gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep),
479 flags);
480 *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
482 /* Propagate this change to any other shadows of the page
483 * (only necessary if there is more than one shadow) */
484 if ( mfn_to_page(gmfn)->count_info & PGC_page_table )
485 {
486 u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask;
487 /* More than one type bit set in shadow-flags? */
488 if ( shflags & ~(1UL << find_first_set_bit(shflags)) )
489 res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep));
490 }
492 /* We should never need to flush the TLB or recopy PAE entries */
493 ASSERT((res == 0) || (res == SHADOW_SET_CHANGED));
495 return flags;
496 }
498 #if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS)
499 void *
500 sh_guest_map_l1e(struct vcpu *v, unsigned long addr,
501 unsigned long *gl1mfn)
502 {
503 void *pl1e = NULL;
504 walk_t gw;
506 ASSERT(shadow_mode_translate(v->domain));
508 // XXX -- this is expensive, but it's easy to cobble together...
509 // FIXME!
511 shadow_lock(v->domain);
512 guest_walk_tables(v, addr, &gw, 1);
514 if ( gw.l2e &&
515 (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) &&
516 !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) & _PAGE_PSE)) )
517 {
518 if ( gl1mfn )
519 *gl1mfn = mfn_x(gw.l1mfn);
520 pl1e = map_domain_page(mfn_x(gw.l1mfn)) +
521 (guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
522 }
524 unmap_walk(v, &gw);
525 shadow_unlock(v->domain);
527 return pl1e;
528 }
530 void
531 sh_guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
532 {
533 walk_t gw;
535 ASSERT(shadow_mode_translate(v->domain));
537 // XXX -- this is expensive, but it's easy to cobble together...
538 // FIXME!
540 shadow_lock(v->domain);
541 guest_walk_tables(v, addr, &gw, 1);
542 *(guest_l1e_t *)eff_l1e = gw.eff_l1e;
543 unmap_walk(v, &gw);
544 shadow_unlock(v->domain);
545 }
546 #endif /* CONFIG==SHADOW==GUEST */
548 /**************************************************************************/
549 /* Functions to compute the correct index into a shadow page, given an
550 * index into the guest page (as returned by guest_get_index()).
551 * This is trivial when the shadow and guest use the same sized PTEs, but
552 * gets more interesting when those sizes are mismatched (e.g. 32-bit guest,
553 * PAE- or 64-bit shadows).
554 *
555 * These functions also increment the shadow mfn, when necessary. When PTE
556 * sizes are mismatched, it takes 2 shadow L1 pages for a single guest L1
557 * page. In this case, we allocate 2 contiguous pages for the shadow L1, and
558 * use simple pointer arithmetic on a pointer to the guest L1e to figure out
559 * which shadow page we really want. Similarly, when PTE sizes are
560 * mismatched, we shadow a guest L2 page with 4 shadow L2 pages. (The easiest
561 * way to see this is: a 32-bit guest L2 page maps 4GB of virtual address
562 * space, while a PAE- or 64-bit shadow L2 page maps 1GB of virtual address
563 * space.)
564 *
565 * For PAE guests, for every 32-bytes of guest L3 page table, we use 64-bytes
566 * of shadow (to store both the shadow, and the info that would normally be
567 * stored in page_info fields). This arrangement allows the shadow and the
568 * "page_info" fields to always be stored in the same page (in fact, in
569 * the same cache line), avoiding an extra call to map_domain_page().
570 */
572 static inline u32
573 guest_index(void *ptr)
574 {
575 return (u32)((unsigned long)ptr & ~PAGE_MASK) / sizeof(guest_l1e_t);
576 }
578 static u32
579 shadow_l1_index(mfn_t *smfn, u32 guest_index)
580 {
581 #if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
582 *smfn = _mfn(mfn_x(*smfn) +
583 (guest_index / SHADOW_L1_PAGETABLE_ENTRIES));
584 return (guest_index % SHADOW_L1_PAGETABLE_ENTRIES);
585 #else
586 return guest_index;
587 #endif
588 }
590 static u32
591 shadow_l2_index(mfn_t *smfn, u32 guest_index)
592 {
593 #if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
594 // Because we use 2 shadow l2 entries for each guest entry, the number of
595 // guest entries per shadow page is SHADOW_L2_PAGETABLE_ENTRIES/2
596 //
597 *smfn = _mfn(mfn_x(*smfn) +
598 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
600 // We multiple by two to get the index of the first of the two entries
601 // used to shadow the specified guest entry.
602 return (guest_index % (SHADOW_L2_PAGETABLE_ENTRIES / 2)) * 2;
603 #else
604 return guest_index;
605 #endif
606 }
608 #if GUEST_PAGING_LEVELS >= 4
610 static u32
611 shadow_l3_index(mfn_t *smfn, u32 guest_index)
612 {
613 return guest_index;
614 }
616 static u32
617 shadow_l4_index(mfn_t *smfn, u32 guest_index)
618 {
619 return guest_index;
620 }
622 #endif // GUEST_PAGING_LEVELS >= 4
624 extern u32 get_pat_flags(struct vcpu *v,
625 u32 gl1e_flags,
626 paddr_t gpaddr,
627 paddr_t spaddr);
629 unsigned char pat_type_2_pte_flags(unsigned char pat_type);
630 /**************************************************************************/
631 /* Function which computes shadow entries from their corresponding guest
632 * entries. This is the "heart" of the shadow code. It operates using
633 * level-1 shadow types, but handles all levels of entry.
634 * Don't call it directly, but use the four wrappers below.
635 */
637 static always_inline void
638 _sh_propagate(struct vcpu *v,
639 void *guest_entry_ptr,
640 mfn_t guest_table_mfn,
641 mfn_t target_mfn,
642 void *shadow_entry_ptr,
643 int level,
644 fetch_type_t ft,
645 p2m_type_t p2mt)
646 {
647 guest_l1e_t *gp = guest_entry_ptr;
648 shadow_l1e_t *sp = shadow_entry_ptr;
649 struct domain *d = v->domain;
650 u32 pass_thru_flags;
651 u32 gflags, sflags;
653 /* We don't shadow PAE l3s */
654 ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
656 /* Check there's something for the shadows to map to */
657 if ( !p2m_is_valid(p2mt) )
658 {
659 *sp = shadow_l1e_empty();
660 goto done;
661 }
663 if ( mfn_valid(guest_table_mfn) )
664 /* Handle A and D bit propagation into the guest */
665 gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft);
666 else
667 {
668 /* Must be an fl1e or a prefetch */
669 ASSERT(level==1 || !(ft & FETCH_TYPE_DEMAND));
670 gflags = guest_l1e_get_flags(*gp);
671 }
673 if ( unlikely(!(gflags & _PAGE_PRESENT)) )
674 {
675 /* If a guest l1 entry is not present, shadow with the magic
676 * guest-not-present entry. */
677 if ( level == 1 )
678 *sp = sh_l1e_gnp();
679 else
680 *sp = shadow_l1e_empty();
681 goto done;
682 }
684 if ( level == 1 && p2mt == p2m_mmio_dm )
685 {
686 /* Guest l1e maps emulated MMIO space */
687 *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
688 if ( !d->arch.paging.shadow.has_fast_mmio_entries )
689 d->arch.paging.shadow.has_fast_mmio_entries = 1;
690 goto done;
691 }
693 // Must have a valid target_mfn unless this is a prefetch. In the
694 // case of a prefetch, an invalid mfn means that we can not usefully
695 // shadow anything, and so we return early.
696 //
697 /* N.B. For pass-through MMIO, either this test needs to be relaxed,
698 * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
699 * MMIO areas need to be added to the frame-table to make them "valid". */
700 if ( shadow_mode_refcounts(d) &&
701 !mfn_valid(target_mfn) && (p2mt != p2m_mmio_direct) )
702 {
703 ASSERT((ft == ft_prefetch));
704 *sp = shadow_l1e_empty();
705 goto done;
706 }
708 // Propagate bits from the guest to the shadow.
709 // Some of these may be overwritten, below.
710 // Since we know the guest's PRESENT bit is set, we also set the shadow's
711 // SHADOW_PRESENT bit.
712 //
713 pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
714 _PAGE_RW | _PAGE_PRESENT);
715 if ( guest_supports_nx(v) )
716 pass_thru_flags |= _PAGE_NX_BIT;
717 if ( !shadow_mode_refcounts(d) && !mfn_valid(target_mfn) )
718 pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT;
719 sflags = gflags & pass_thru_flags;
721 /* Only change memory caching type for pass-through domain */
722 if ( (level == 1) && is_hvm_domain(d) &&
723 !list_empty(&(domain_hvm_iommu(d)->pdev_list)) )
724 {
725 unsigned int type;
726 if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(guest_l1e_get_gfn(*gp)),
727 &type) )
728 sflags |= pat_type_2_pte_flags(type);
729 else if ( v->domain->arch.hvm_domain.is_in_uc_mode )
730 sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
731 else
732 sflags |= get_pat_flags(v,
733 gflags,
734 guest_l1e_get_paddr(*gp),
735 mfn_x(target_mfn) << PAGE_SHIFT);
736 }
738 // Set the A&D bits for higher level shadows.
739 // Higher level entries do not, strictly speaking, have dirty bits, but
740 // since we use shadow linear tables, each of these entries may, at some
741 // point in time, also serve as a shadow L1 entry.
742 // By setting both the A&D bits in each of these, we eliminate the burden
743 // on the hardware to update these bits on initial accesses.
744 //
745 if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
746 sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
748 // If the A or D bit has not yet been set in the guest, then we must
749 // prevent the corresponding kind of access.
750 //
751 if ( unlikely(!(gflags & _PAGE_ACCESSED)) )
752 sflags &= ~_PAGE_PRESENT;
754 /* D bits exist in L1es and PSE L2es */
755 if ( unlikely(((level == 1) ||
756 ((level == 2) &&
757 (gflags & _PAGE_PSE) &&
758 guest_supports_superpages(v)))
759 && !(gflags & _PAGE_DIRTY)) )
760 sflags &= ~_PAGE_RW;
762 // shadow_mode_log_dirty support
763 //
764 // Only allow the guest write access to a page a) on a demand fault,
765 // or b) if the page is already marked as dirty.
766 //
767 // (We handle log-dirty entirely inside the shadow code, without using the
768 // p2m_ram_logdirty p2m type: only HAP uses that.)
769 if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
770 {
771 if ( mfn_valid(target_mfn) ) {
772 if ( ft & FETCH_TYPE_WRITE )
773 paging_mark_dirty(d, mfn_x(target_mfn));
774 else if ( !sh_mfn_is_dirty(d, target_mfn) )
775 sflags &= ~_PAGE_RW;
776 }
777 }
779 /* Read-only memory */
780 if ( p2mt == p2m_ram_ro )
781 sflags &= ~_PAGE_RW;
783 // protect guest page tables
784 //
785 if ( unlikely((level == 1) && sh_mfn_is_a_page_table(target_mfn)) )
786 {
787 if ( shadow_mode_trap_reads(d) )
788 {
789 // if we are trapping both reads & writes, then mark this page
790 // as not present...
791 //
792 sflags &= ~_PAGE_PRESENT;
793 }
794 else
795 {
796 // otherwise, just prevent any writes...
797 //
798 sflags &= ~_PAGE_RW;
799 }
800 }
802 // PV guests in 64-bit mode use two different page tables for user vs
803 // supervisor permissions, making the guest's _PAGE_USER bit irrelevant.
804 // It is always shadowed as present...
805 if ( (GUEST_PAGING_LEVELS == 4) && !is_pv_32on64_domain(d)
806 && !is_hvm_domain(d) )
807 {
808 sflags |= _PAGE_USER;
809 }
811 *sp = shadow_l1e_from_mfn(target_mfn, sflags);
813 done:
814 SHADOW_DEBUG(PROPAGATE,
815 "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
816 fetch_type_names[ft], level, gp->l1, sp->l1);
817 }
820 /* These four wrappers give us a little bit of type-safety back around the
821 * use of void-* pointers in _sh_propagate(), and allow the compiler to
822 * optimize out some level checks. */
824 #if GUEST_PAGING_LEVELS >= 4
825 static void
826 l4e_propagate_from_guest(struct vcpu *v,
827 guest_l4e_t *gl4e,
828 mfn_t gl4mfn,
829 mfn_t sl3mfn,
830 shadow_l4e_t *sl4e,
831 fetch_type_t ft)
832 {
833 _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
834 }
836 static void
837 l3e_propagate_from_guest(struct vcpu *v,
838 guest_l3e_t *gl3e,
839 mfn_t gl3mfn,
840 mfn_t sl2mfn,
841 shadow_l3e_t *sl3e,
842 fetch_type_t ft)
843 {
844 _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
845 }
846 #endif // GUEST_PAGING_LEVELS >= 4
848 static void
849 l2e_propagate_from_guest(struct vcpu *v,
850 guest_l2e_t *gl2e,
851 mfn_t gl2mfn,
852 mfn_t sl1mfn,
853 shadow_l2e_t *sl2e,
854 fetch_type_t ft)
855 {
856 _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
857 }
859 static void
860 l1e_propagate_from_guest(struct vcpu *v,
861 guest_l1e_t *gl1e,
862 mfn_t gl1mfn,
863 mfn_t gmfn,
864 shadow_l1e_t *sl1e,
865 fetch_type_t ft,
866 p2m_type_t p2mt)
867 {
868 _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
869 }
872 /**************************************************************************/
873 /* These functions update shadow entries (and do bookkeeping on the shadow
874 * tables they are in). It is intended that they are the only
875 * functions which ever write (non-zero) data onto a shadow page.
876 */
878 static inline void safe_write_entry(void *dst, void *src)
879 /* Copy one PTE safely when processors might be running on the
880 * destination pagetable. This does *not* give safety against
881 * concurrent writes (that's what the shadow lock is for), just
882 * stops the hardware picking up partially written entries. */
883 {
884 volatile unsigned long *d = dst;
885 unsigned long *s = src;
886 ASSERT(!((unsigned long) d & (sizeof (shadow_l1e_t) - 1)));
887 #if CONFIG_PAGING_LEVELS == 3
888 /* In PAE mode, pagetable entries are larger
889 * than machine words, so won't get written atomically. We need to make
890 * sure any other cpu running on these shadows doesn't see a
891 * half-written entry. Do this by marking the entry not-present first,
892 * then writing the high word before the low word. */
893 BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long));
894 d[0] = 0;
895 d[1] = s[1];
896 d[0] = s[0];
897 #else
898 /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word,
899 * which will be an atomic write, since the entry is aligned. */
900 BUILD_BUG_ON(sizeof (shadow_l1e_t) != sizeof (unsigned long));
901 *d = *s;
902 #endif
903 }
906 static inline void
907 shadow_write_entries(void *d, void *s, int entries, mfn_t mfn)
908 /* This function does the actual writes to shadow pages.
909 * It must not be called directly, since it doesn't do the bookkeeping
910 * that shadow_set_l*e() functions do. */
911 {
912 shadow_l1e_t *dst = d;
913 shadow_l1e_t *src = s;
914 void *map = NULL;
915 int i;
917 /* Because we mirror access rights at all levels in the shadow, an
918 * l2 (or higher) entry with the RW bit cleared will leave us with
919 * no write access through the linear map.
920 * We detect that by writing to the shadow with copy_to_user() and
921 * using map_domain_page() to get a writeable mapping if we need to. */
922 if ( __copy_to_user(d, d, sizeof (unsigned long)) != 0 )
923 {
924 perfc_incr(shadow_linear_map_failed);
925 map = sh_map_domain_page(mfn);
926 ASSERT(map != NULL);
927 dst = map + ((unsigned long)dst & (PAGE_SIZE - 1));
928 }
931 for ( i = 0; i < entries; i++ )
932 safe_write_entry(dst++, src++);
934 if ( map != NULL ) sh_unmap_domain_page(map);
935 }
937 static inline int
938 perms_strictly_increased(u32 old_flags, u32 new_flags)
939 /* Given the flags of two entries, are the new flags a strict
940 * increase in rights over the old ones? */
941 {
942 u32 of = old_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
943 u32 nf = new_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
944 /* Flip the NX bit, since it's the only one that decreases rights;
945 * we calculate as if it were an "X" bit. */
946 of ^= _PAGE_NX_BIT;
947 nf ^= _PAGE_NX_BIT;
948 /* If the changed bits are all set in the new flags, then rights strictly
949 * increased between old and new. */
950 return ((of | (of ^ nf)) == nf);
951 }
953 static int inline
954 shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
955 {
956 int res;
957 mfn_t mfn;
958 struct domain *owner;
960 ASSERT(!sh_l1e_is_magic(sl1e));
962 if ( !shadow_mode_refcounts(d) )
963 return 1;
965 res = get_page_from_l1e(sl1e, d);
967 // If a privileged domain is attempting to install a map of a page it does
968 // not own, we let it succeed anyway.
969 //
970 if ( unlikely(!res) &&
971 IS_PRIV(d) &&
972 !shadow_mode_translate(d) &&
973 mfn_valid(mfn = shadow_l1e_get_mfn(sl1e)) &&
974 (owner = page_get_owner(mfn_to_page(mfn))) &&
975 (d != owner) )
976 {
977 res = get_page_from_l1e(sl1e, owner);
978 SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
979 "which is owned by domain %d: %s\n",
980 d->domain_id, mfn_x(mfn), owner->domain_id,
981 res ? "success" : "failed");
982 }
984 if ( unlikely(!res) )
985 {
986 perfc_incr(shadow_get_page_fail);
987 SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
988 }
990 return res;
991 }
993 static void inline
994 shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
995 {
996 if ( !shadow_mode_refcounts(d) )
997 return;
999 put_page_from_l1e(sl1e, d);
1002 #if GUEST_PAGING_LEVELS >= 4
1003 static int shadow_set_l4e(struct vcpu *v,
1004 shadow_l4e_t *sl4e,
1005 shadow_l4e_t new_sl4e,
1006 mfn_t sl4mfn)
1008 int flags = 0, ok;
1009 shadow_l4e_t old_sl4e;
1010 paddr_t paddr;
1011 ASSERT(sl4e != NULL);
1012 old_sl4e = *sl4e;
1014 if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
1016 paddr = ((((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT)
1017 | (((unsigned long)sl4e) & ~PAGE_MASK));
1019 if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
1021 /* About to install a new reference */
1022 mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e);
1023 ok = sh_get_ref(v, sl3mfn, paddr);
1024 /* Are we pinning l3 shadows to handle wierd linux behaviour? */
1025 if ( sh_type_is_pinnable(v, SH_type_l3_64_shadow) )
1026 ok |= sh_pin(v, sl3mfn);
1027 if ( !ok )
1029 domain_crash(v->domain);
1030 return SHADOW_SET_ERROR;
1034 /* Write the new entry */
1035 shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
1036 flags |= SHADOW_SET_CHANGED;
1038 if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT )
1040 /* We lost a reference to an old mfn. */
1041 mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
1042 if ( (mfn_x(osl3mfn) != mfn_x(shadow_l4e_get_mfn(new_sl4e)))
1043 || !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e),
1044 shadow_l4e_get_flags(new_sl4e)) )
1046 flags |= SHADOW_SET_FLUSH;
1048 sh_put_ref(v, osl3mfn, paddr);
1050 return flags;
1053 static int shadow_set_l3e(struct vcpu *v,
1054 shadow_l3e_t *sl3e,
1055 shadow_l3e_t new_sl3e,
1056 mfn_t sl3mfn)
1058 int flags = 0;
1059 shadow_l3e_t old_sl3e;
1060 paddr_t paddr;
1061 ASSERT(sl3e != NULL);
1062 old_sl3e = *sl3e;
1064 if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
1066 paddr = ((((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT)
1067 | (((unsigned long)sl3e) & ~PAGE_MASK));
1069 if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT )
1070 /* About to install a new reference */
1071 if ( !sh_get_ref(v, shadow_l3e_get_mfn(new_sl3e), paddr) )
1073 domain_crash(v->domain);
1074 return SHADOW_SET_ERROR;
1077 /* Write the new entry */
1078 shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
1079 flags |= SHADOW_SET_CHANGED;
1081 if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT )
1083 /* We lost a reference to an old mfn. */
1084 mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
1085 if ( (mfn_x(osl2mfn) != mfn_x(shadow_l3e_get_mfn(new_sl3e))) ||
1086 !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e),
1087 shadow_l3e_get_flags(new_sl3e)) )
1089 flags |= SHADOW_SET_FLUSH;
1091 sh_put_ref(v, osl2mfn, paddr);
1093 return flags;
1095 #endif /* GUEST_PAGING_LEVELS >= 4 */
1097 static int shadow_set_l2e(struct vcpu *v,
1098 shadow_l2e_t *sl2e,
1099 shadow_l2e_t new_sl2e,
1100 mfn_t sl2mfn)
1102 int flags = 0;
1103 shadow_l2e_t old_sl2e;
1104 paddr_t paddr;
1106 #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
1107 /* In 2-on-3 we work with pairs of l2es pointing at two-page
1108 * shadows. Reference counting and up-pointers track from the first
1109 * page of the shadow to the first l2e, so make sure that we're
1110 * working with those:
1111 * Align the pointer down so it's pointing at the first of the pair */
1112 sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~(sizeof(shadow_l2e_t)));
1113 /* Align the mfn of the shadow entry too */
1114 new_sl2e.l2 &= ~(1<<PAGE_SHIFT);
1115 #endif
1117 ASSERT(sl2e != NULL);
1118 old_sl2e = *sl2e;
1120 if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
1122 paddr = ((((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
1123 | (((unsigned long)sl2e) & ~PAGE_MASK));
1125 if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
1126 /* About to install a new reference */
1127 if ( !sh_get_ref(v, shadow_l2e_get_mfn(new_sl2e), paddr) )
1129 domain_crash(v->domain);
1130 return SHADOW_SET_ERROR;
1133 /* Write the new entry */
1134 #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
1136 shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
1137 /* The l1 shadow is two pages long and need to be pointed to by
1138 * two adjacent l1es. The pair have the same flags, but point
1139 * at odd and even MFNs */
1140 ASSERT(!(pair[0].l2 & (1<<PAGE_SHIFT)));
1141 pair[1].l2 |= (1<<PAGE_SHIFT);
1142 shadow_write_entries(sl2e, &pair, 2, sl2mfn);
1144 #else /* normal case */
1145 shadow_write_entries(sl2e, &new_sl2e, 1, sl2mfn);
1146 #endif
1147 flags |= SHADOW_SET_CHANGED;
1149 if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
1151 /* We lost a reference to an old mfn. */
1152 mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
1153 if ( (mfn_x(osl1mfn) != mfn_x(shadow_l2e_get_mfn(new_sl2e))) ||
1154 !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e),
1155 shadow_l2e_get_flags(new_sl2e)) )
1157 flags |= SHADOW_SET_FLUSH;
1159 sh_put_ref(v, osl1mfn, paddr);
1161 return flags;
1164 static int shadow_set_l1e(struct vcpu *v,
1165 shadow_l1e_t *sl1e,
1166 shadow_l1e_t new_sl1e,
1167 mfn_t sl1mfn)
1169 int flags = 0;
1170 struct domain *d = v->domain;
1171 shadow_l1e_t old_sl1e;
1172 ASSERT(sl1e != NULL);
1174 old_sl1e = *sl1e;
1176 if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
1178 if ( (shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT)
1179 && !sh_l1e_is_magic(new_sl1e) )
1181 /* About to install a new reference */
1182 if ( shadow_mode_refcounts(d) ) {
1183 if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 )
1185 /* Doesn't look like a pagetable. */
1186 flags |= SHADOW_SET_ERROR;
1187 new_sl1e = shadow_l1e_empty();
1192 /* Write the new entry */
1193 shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
1194 flags |= SHADOW_SET_CHANGED;
1196 if ( (shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT)
1197 && !sh_l1e_is_magic(old_sl1e) )
1199 /* We lost a reference to an old mfn. */
1200 /* N.B. Unlike higher-level sets, never need an extra flush
1201 * when writing an l1e. Because it points to the same guest frame
1202 * as the guest l1e did, it's the guest's responsibility to
1203 * trigger a flush later. */
1204 if ( shadow_mode_refcounts(d) )
1206 shadow_put_page_from_l1e(old_sl1e, d);
1209 return flags;
1213 /**************************************************************************/
1214 /* Macros to walk pagetables. These take the shadow of a pagetable and
1215 * walk every "interesting" entry. That is, they don't touch Xen mappings,
1216 * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every
1217 * second entry (since pairs of entries are managed together). For multi-page
1218 * shadows they walk all pages.
1220 * Arguments are an MFN, the variable to point to each entry, a variable
1221 * to indicate that we are done (we will shortcut to the end of the scan
1222 * when _done != 0), a variable to indicate that we should avoid Xen mappings,
1223 * and the code.
1225 * WARNING: These macros have side-effects. They change the values of both
1226 * the pointer and the MFN. */
1228 static inline void increment_ptr_to_guest_entry(void *ptr)
1230 if ( ptr )
1232 guest_l1e_t **entry = ptr;
1233 (*entry)++;
1237 /* All kinds of l1: touch all entries */
1238 #define _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \
1239 do { \
1240 int _i; \
1241 shadow_l1e_t *_sp = map_shadow_page((_sl1mfn)); \
1242 ASSERT(mfn_to_shadow_page(_sl1mfn)->type == SH_type_l1_shadow \
1243 || mfn_to_shadow_page(_sl1mfn)->type == SH_type_fl1_shadow); \
1244 for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ ) \
1245 { \
1246 (_sl1e) = _sp + _i; \
1247 if ( shadow_l1e_get_flags(*(_sl1e)) & _PAGE_PRESENT ) \
1248 {_code} \
1249 if ( _done ) break; \
1250 increment_ptr_to_guest_entry(_gl1p); \
1251 } \
1252 unmap_shadow_page(_sp); \
1253 } while (0)
1255 /* 32-bit l1, on PAE or 64-bit shadows: need to walk both pages of shadow */
1256 #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
1257 #define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \
1258 do { \
1259 int __done = 0; \
1260 _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, \
1261 ({ (__done = _done); }), _code); \
1262 _sl1mfn = _mfn(mfn_x(_sl1mfn) + 1); \
1263 if ( !__done ) \
1264 _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, \
1265 ({ (__done = _done); }), _code); \
1266 } while (0)
1267 #else /* Everything else; l1 shadows are only one page */
1268 #define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \
1269 _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)
1270 #endif
1273 #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
1275 /* 32-bit l2 on PAE/64: four pages, touch every second entry, and avoid Xen */
1276 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \
1277 do { \
1278 int _i, _j, __done = 0; \
1279 int _xen = !shadow_mode_external(_dom); \
1280 ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow); \
1281 for ( _j = 0; _j < 4 && !__done; _j++ ) \
1282 { \
1283 shadow_l2e_t *_sp = map_shadow_page(_sl2mfn); \
1284 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i += 2 ) \
1285 if ( (!(_xen)) \
1286 || ((_j * SHADOW_L2_PAGETABLE_ENTRIES) + _i) \
1287 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT) ) \
1288 { \
1289 (_sl2e) = _sp + _i; \
1290 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
1291 {_code} \
1292 if ( (__done = (_done)) ) break; \
1293 increment_ptr_to_guest_entry(_gl2p); \
1294 } \
1295 unmap_shadow_page(_sp); \
1296 _sl2mfn = _mfn(mfn_x(_sl2mfn) + 1); \
1297 } \
1298 } while (0)
1300 #elif GUEST_PAGING_LEVELS == 2
1302 /* 32-bit on 32-bit: avoid Xen entries */
1303 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \
1304 do { \
1305 int _i; \
1306 int _xen = !shadow_mode_external(_dom); \
1307 shadow_l2e_t *_sp = map_shadow_page((_sl2mfn)); \
1308 ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow); \
1309 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
1310 if ( (!(_xen)) \
1311 || \
1312 (_i < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
1313 { \
1314 (_sl2e) = _sp + _i; \
1315 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
1316 {_code} \
1317 if ( _done ) break; \
1318 increment_ptr_to_guest_entry(_gl2p); \
1319 } \
1320 unmap_shadow_page(_sp); \
1321 } while (0)
1323 #elif GUEST_PAGING_LEVELS == 3
1325 /* PAE: if it's an l2h, don't touch Xen mappings */
1326 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \
1327 do { \
1328 int _i; \
1329 int _xen = !shadow_mode_external(_dom); \
1330 shadow_l2e_t *_sp = map_shadow_page((_sl2mfn)); \
1331 ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_pae_shadow \
1332 || mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_pae_shadow);\
1333 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
1334 if ( (!(_xen)) \
1335 || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_pae_shadow\
1336 || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES)) \
1337 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
1338 { \
1339 (_sl2e) = _sp + _i; \
1340 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
1341 {_code} \
1342 if ( _done ) break; \
1343 increment_ptr_to_guest_entry(_gl2p); \
1344 } \
1345 unmap_shadow_page(_sp); \
1346 } while (0)
1348 #else
1350 /* 64-bit l2: touch all entries except for PAE compat guests. */
1351 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \
1352 do { \
1353 int _i; \
1354 int _xen = !shadow_mode_external(_dom); \
1355 shadow_l2e_t *_sp = map_shadow_page((_sl2mfn)); \
1356 ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_64_shadow || \
1357 mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_64_shadow); \
1358 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
1359 { \
1360 if ( (!(_xen)) \
1361 || !is_pv_32on64_domain(_dom) \
1362 || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_64_shadow \
1363 || (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) ) \
1364 { \
1365 (_sl2e) = _sp + _i; \
1366 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
1367 {_code} \
1368 if ( _done ) break; \
1369 increment_ptr_to_guest_entry(_gl2p); \
1370 } \
1371 } \
1372 unmap_shadow_page(_sp); \
1373 } while (0)
1375 #endif /* different kinds of l2 */
1377 #if GUEST_PAGING_LEVELS == 4
1379 /* 64-bit l3: touch all entries */
1380 #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \
1381 do { \
1382 int _i; \
1383 shadow_l3e_t *_sp = map_shadow_page((_sl3mfn)); \
1384 ASSERT(mfn_to_shadow_page(_sl3mfn)->type == SH_type_l3_64_shadow); \
1385 for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ ) \
1386 { \
1387 (_sl3e) = _sp + _i; \
1388 if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT ) \
1389 {_code} \
1390 if ( _done ) break; \
1391 increment_ptr_to_guest_entry(_gl3p); \
1392 } \
1393 unmap_shadow_page(_sp); \
1394 } while (0)
1396 /* 64-bit l4: avoid Xen mappings */
1397 #define SHADOW_FOREACH_L4E(_sl4mfn, _sl4e, _gl4p, _done, _dom, _code) \
1398 do { \
1399 shadow_l4e_t *_sp = map_shadow_page((_sl4mfn)); \
1400 int _xen = !shadow_mode_external(_dom); \
1401 int _i; \
1402 ASSERT(mfn_to_shadow_page(_sl4mfn)->type == SH_type_l4_64_shadow); \
1403 for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ ) \
1404 { \
1405 if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) ) \
1406 { \
1407 (_sl4e) = _sp + _i; \
1408 if ( shadow_l4e_get_flags(*(_sl4e)) & _PAGE_PRESENT ) \
1409 {_code} \
1410 if ( _done ) break; \
1411 } \
1412 increment_ptr_to_guest_entry(_gl4p); \
1413 } \
1414 unmap_shadow_page(_sp); \
1415 } while (0)
1417 #endif
1421 /**************************************************************************/
1422 /* Functions to install Xen mappings and linear mappings in shadow pages */
1424 // XXX -- this function should probably be moved to shadow-common.c, but that
1425 // probably wants to wait until the shadow types have been moved from
1426 // shadow-types.h to shadow-private.h
1427 //
1428 #if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
1429 void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
1431 struct domain *d = v->domain;
1432 shadow_l4e_t *sl4e;
1434 sl4e = sh_map_domain_page(sl4mfn);
1435 ASSERT(sl4e != NULL);
1436 ASSERT(sizeof (l4_pgentry_t) == sizeof (shadow_l4e_t));
1438 /* Copy the common Xen mappings from the idle domain */
1439 memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1440 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1441 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
1443 /* Install the per-domain mappings for this domain */
1444 sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] =
1445 shadow_l4e_from_mfn(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3)),
1446 __PAGE_HYPERVISOR);
1448 /* Linear mapping */
1449 sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
1450 shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
1452 if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) )
1454 // linear tables may not be used with translated PV guests
1455 sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
1456 shadow_l4e_empty();
1458 else
1460 sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
1461 shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
1464 if ( shadow_mode_translate(v->domain) )
1466 /* install domain-specific P2M table */
1467 sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] =
1468 shadow_l4e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
1469 __PAGE_HYPERVISOR);
1472 if ( is_pv_32on64_domain(v->domain) )
1474 /* install compat arg xlat entry */
1475 sl4e[shadow_l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
1476 shadow_l4e_from_mfn(
1477 page_to_mfn(virt_to_page(d->arch.mm_arg_xlat_l3)),
1478 __PAGE_HYPERVISOR);
1481 sh_unmap_domain_page(sl4e);
1483 #endif
1485 #if CONFIG_PAGING_LEVELS >= 3 && GUEST_PAGING_LEVELS >= 3
1486 // For 3-on-3 PV guests, we need to make sure the xen mappings are in
1487 // place, which means that we need to populate the l2h entry in the l3
1488 // table.
1490 static void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn)
1492 struct domain *d = v->domain;
1493 shadow_l2e_t *sl2e;
1494 #if CONFIG_PAGING_LEVELS == 3
1495 int i;
1496 #else
1498 if ( !is_pv_32on64_vcpu(v) )
1499 return;
1500 #endif
1502 sl2e = sh_map_domain_page(sl2hmfn);
1503 ASSERT(sl2e != NULL);
1504 ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
1506 #if CONFIG_PAGING_LEVELS == 3
1508 /* Copy the common Xen mappings from the idle domain */
1509 memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
1510 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
1511 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
1513 /* Install the per-domain mappings for this domain */
1514 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
1515 sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
1516 shadow_l2e_from_mfn(
1517 page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
1518 __PAGE_HYPERVISOR);
1520 /* We don't set up a linear mapping here because we can't until this
1521 * l2h is installed in an l3e. sh_update_linear_entries() handles
1522 * the linear mappings when CR3 (and so the fourth l3e) is loaded.
1523 * We zero them here, just as a safety measure.
1524 */
1525 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
1526 sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
1527 shadow_l2e_empty();
1528 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
1529 sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START) + i] =
1530 shadow_l2e_empty();
1532 if ( shadow_mode_translate(d) )
1534 /* Install the domain-specific p2m table */
1535 l3_pgentry_t *p2m;
1536 ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
1537 p2m = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
1538 for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
1540 sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START) + i] =
1541 (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
1542 ? shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])),
1543 __PAGE_HYPERVISOR)
1544 : shadow_l2e_empty();
1546 sh_unmap_domain_page(p2m);
1549 #else
1551 /* Copy the common Xen mappings from the idle domain */
1552 memcpy(
1553 &sl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
1554 &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
1555 COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*sl2e));
1557 #endif
1559 sh_unmap_domain_page(sl2e);
1561 #endif
1564 #if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
1565 void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
1567 struct domain *d = v->domain;
1568 shadow_l2e_t *sl2e;
1569 int i;
1571 sl2e = sh_map_domain_page(sl2mfn);
1572 ASSERT(sl2e != NULL);
1573 ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
1575 /* Copy the common Xen mappings from the idle domain */
1576 memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
1577 &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
1578 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
1580 /* Install the per-domain mappings for this domain */
1581 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
1582 sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
1583 shadow_l2e_from_mfn(
1584 page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
1585 __PAGE_HYPERVISOR);
1587 /* Linear mapping */
1588 sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1589 shadow_l2e_from_mfn(sl2mfn, __PAGE_HYPERVISOR);
1591 if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) )
1593 // linear tables may not be used with translated PV guests
1594 sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
1595 shadow_l2e_empty();
1597 else
1599 sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
1600 shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
1603 if ( shadow_mode_translate(d) )
1605 /* install domain-specific P2M table */
1606 sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START)] =
1607 shadow_l2e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
1608 __PAGE_HYPERVISOR);
1611 sh_unmap_domain_page(sl2e);
1613 #endif
1617 /**************************************************************************/
1618 /* Create a shadow of a given guest page.
1619 */
1620 static mfn_t
1621 sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
1623 mfn_t smfn = shadow_alloc(v->domain, shadow_type, mfn_x(gmfn));
1624 SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
1625 mfn_x(gmfn), shadow_type, mfn_x(smfn));
1627 if ( shadow_type != SH_type_l2_32_shadow
1628 && shadow_type != SH_type_l2_pae_shadow
1629 && shadow_type != SH_type_l2h_pae_shadow
1630 && shadow_type != SH_type_l4_64_shadow )
1631 /* Lower-level shadow, not yet linked form a higher level */
1632 mfn_to_shadow_page(smfn)->up = 0;
1634 #if GUEST_PAGING_LEVELS == 4
1635 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
1636 if ( shadow_type == SH_type_l4_64_shadow &&
1637 unlikely(v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) )
1639 /* We're shadowing a new l4, but we've been assuming the guest uses
1640 * only one l4 per vcpu and context switches using an l4 entry.
1641 * Count the number of active l4 shadows. If there are enough
1642 * of them, decide that this isn't an old linux guest, and stop
1643 * pinning l3es. This is not very quick but it doesn't happen
1644 * very often. */
1645 struct list_head *l, *t;
1646 struct shadow_page_info *sp;
1647 struct vcpu *v2;
1648 int l4count = 0, vcpus = 0;
1649 list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows)
1651 sp = list_entry(l, struct shadow_page_info, list);
1652 if ( sp->type == SH_type_l4_64_shadow )
1653 l4count++;
1655 for_each_vcpu ( v->domain, v2 )
1656 vcpus++;
1657 if ( l4count > 2 * vcpus )
1659 /* Unpin all the pinned l3 tables, and don't pin any more. */
1660 list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows)
1662 sp = list_entry(l, struct shadow_page_info, list);
1663 if ( sp->type == SH_type_l3_64_shadow )
1664 sh_unpin(v, shadow_page_to_mfn(sp));
1666 v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL;
1669 #endif
1670 #endif
1672 // Create the Xen mappings...
1673 if ( !shadow_mode_external(v->domain) )
1675 switch (shadow_type)
1677 #if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
1678 case SH_type_l4_shadow:
1679 sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
1680 #endif
1681 #if CONFIG_PAGING_LEVELS >= 3 && GUEST_PAGING_LEVELS >= 3
1682 case SH_type_l2h_shadow:
1683 sh_install_xen_entries_in_l2h(v, smfn); break;
1684 #endif
1685 #if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
1686 case SH_type_l2_shadow:
1687 sh_install_xen_entries_in_l2(v, gmfn, smfn); break;
1688 #endif
1689 default: /* Do nothing */ break;
1693 shadow_promote(v, gmfn, shadow_type);
1694 set_shadow_status(v, gmfn, shadow_type, smfn);
1696 return smfn;
1699 /* Make a splintered superpage shadow */
1700 static mfn_t
1701 make_fl1_shadow(struct vcpu *v, gfn_t gfn)
1703 mfn_t smfn = shadow_alloc(v->domain, SH_type_fl1_shadow,
1704 (unsigned long) gfn_x(gfn));
1706 SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" PRI_mfn "\n",
1707 gfn_x(gfn), mfn_x(smfn));
1709 set_fl1_shadow_status(v, gfn, smfn);
1710 return smfn;
1714 #if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
1715 mfn_t
1716 sh_make_monitor_table(struct vcpu *v)
1718 struct domain *d = v->domain;
1720 ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
1722 /* Guarantee we can get the memory we need */
1723 shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS - 1);
1725 #if CONFIG_PAGING_LEVELS == 4
1727 mfn_t m4mfn;
1728 m4mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1729 sh_install_xen_entries_in_l4(v, m4mfn, m4mfn);
1730 /* Remember the level of this table */
1731 mfn_to_page(m4mfn)->shadow_flags = 4;
1732 #if SHADOW_PAGING_LEVELS < 4
1733 // Install a monitor l3 table in slot 0 of the l4 table.
1734 // This is used for shadow linear maps.
1736 mfn_t m3mfn;
1737 l4_pgentry_t *l4e;
1738 m3mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1739 mfn_to_page(m3mfn)->shadow_flags = 3;
1740 l4e = sh_map_domain_page(m4mfn);
1741 l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR);
1742 sh_unmap_domain_page(l4e);
1743 if ( is_pv_32on64_vcpu(v) )
1745 // Install a monitor l2 table in slot 3 of the l3 table.
1746 // This is used for all Xen entries.
1747 mfn_t m2mfn;
1748 l3_pgentry_t *l3e;
1749 m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1750 mfn_to_page(m2mfn)->shadow_flags = 2;
1751 l3e = sh_map_domain_page(m3mfn);
1752 l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
1753 sh_install_xen_entries_in_l2h(v, m2mfn);
1754 sh_unmap_domain_page(l3e);
1757 #endif /* SHADOW_PAGING_LEVELS < 4 */
1758 return m4mfn;
1761 #elif CONFIG_PAGING_LEVELS == 3
1764 mfn_t m3mfn, m2mfn;
1765 l3_pgentry_t *l3e;
1766 l2_pgentry_t *l2e;
1767 int i;
1769 m3mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1770 /* Remember the level of this table */
1771 mfn_to_page(m3mfn)->shadow_flags = 3;
1773 // Install a monitor l2 table in slot 3 of the l3 table.
1774 // This is used for all Xen entries, including linear maps
1775 m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1776 mfn_to_page(m2mfn)->shadow_flags = 2;
1777 l3e = sh_map_domain_page(m3mfn);
1778 l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
1779 sh_install_xen_entries_in_l2h(v, m2mfn);
1780 /* Install the monitor's own linear map */
1781 l2e = sh_map_domain_page(m2mfn);
1782 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1783 l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
1784 (l3e_get_flags(l3e[i]) & _PAGE_PRESENT)
1785 ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR)
1786 : l2e_empty();
1787 sh_unmap_domain_page(l2e);
1788 sh_unmap_domain_page(l3e);
1790 SHADOW_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
1791 return m3mfn;
1794 #elif CONFIG_PAGING_LEVELS == 2
1797 mfn_t m2mfn;
1798 m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
1799 sh_install_xen_entries_in_l2(v, m2mfn, m2mfn);
1800 /* Remember the level of this table */
1801 mfn_to_page(m2mfn)->shadow_flags = 2;
1802 return m2mfn;
1805 #else
1806 #error this should not happen
1807 #endif /* CONFIG_PAGING_LEVELS */
1809 #endif /* SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS */
1811 /**************************************************************************/
1812 /* These functions also take a virtual address and return the level-N
1813 * shadow table mfn and entry, but they create the shadow pagetables if
1814 * they are needed. The "demand" argument is non-zero when handling
1815 * a demand fault (so we know what to do about accessed bits &c).
1816 * If the necessary tables are not present in the guest, they return NULL. */
1818 /* N.B. The use of GUEST_PAGING_LEVELS here is correct. If the shadow has
1819 * more levels than the guest, the upper levels are always fixed and do not
1820 * reflect any information from the guest, so we do not use these functions
1821 * to access them. */
1823 #if GUEST_PAGING_LEVELS >= 4
1824 static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v,
1825 walk_t *gw,
1826 mfn_t *sl4mfn)
1828 /* There is always a shadow of the top level table. Get it. */
1829 *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
1830 /* Reading the top level table is always valid. */
1831 return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
1834 static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v,
1835 walk_t *gw,
1836 mfn_t *sl3mfn,
1837 fetch_type_t ft)
1839 mfn_t sl4mfn;
1840 shadow_l4e_t *sl4e;
1841 if ( !mfn_valid(gw->l3mfn) ) return NULL; /* No guest page. */
1842 /* Get the l4e */
1843 sl4e = shadow_get_and_create_l4e(v, gw, &sl4mfn);
1844 ASSERT(sl4e != NULL);
1845 if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT )
1847 *sl3mfn = shadow_l4e_get_mfn(*sl4e);
1848 ASSERT(mfn_valid(*sl3mfn));
1850 else
1852 int r;
1853 shadow_l4e_t new_sl4e;
1854 /* No l3 shadow installed: find and install it. */
1855 *sl3mfn = get_shadow_status(v, gw->l3mfn, SH_type_l3_shadow);
1856 if ( !mfn_valid(*sl3mfn) )
1858 /* No l3 shadow of this page exists at all: make one. */
1859 *sl3mfn = sh_make_shadow(v, gw->l3mfn, SH_type_l3_shadow);
1861 /* Install the new sl3 table in the sl4e */
1862 l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn,
1863 *sl3mfn, &new_sl4e, ft);
1864 r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
1865 ASSERT((r & SHADOW_SET_FLUSH) == 0);
1866 if ( r & SHADOW_SET_ERROR )
1867 return NULL;
1869 /* Now follow it down a level. Guaranteed to succeed. */
1870 return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
1872 #endif /* GUEST_PAGING_LEVELS >= 4 */
1875 static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v,
1876 walk_t *gw,
1877 mfn_t *sl2mfn,
1878 fetch_type_t ft)
1880 #if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
1881 mfn_t sl3mfn = _mfn(INVALID_MFN);
1882 shadow_l3e_t *sl3e;
1883 if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */
1884 /* Get the l3e */
1885 sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft);
1886 if ( sl3e == NULL ) return NULL;
1887 if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT )
1889 *sl2mfn = shadow_l3e_get_mfn(*sl3e);
1890 ASSERT(mfn_valid(*sl2mfn));
1892 else
1894 int r;
1895 shadow_l3e_t new_sl3e;
1896 unsigned int t = SH_type_l2_shadow;
1898 /* Tag compat L2 containing hypervisor (m2p) mappings */
1899 if ( is_pv_32on64_domain(v->domain) &&
1900 guest_l4_table_offset(gw->va) == 0 &&
1901 guest_l3_table_offset(gw->va) == 3 )
1902 t = SH_type_l2h_shadow;
1904 /* No l2 shadow installed: find and install it. */
1905 *sl2mfn = get_shadow_status(v, gw->l2mfn, t);
1906 if ( !mfn_valid(*sl2mfn) )
1908 /* No l2 shadow of this page exists at all: make one. */
1909 *sl2mfn = sh_make_shadow(v, gw->l2mfn, t);
1911 /* Install the new sl2 table in the sl3e */
1912 l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn,
1913 *sl2mfn, &new_sl3e, ft);
1914 r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
1915 ASSERT((r & SHADOW_SET_FLUSH) == 0);
1916 if ( r & SHADOW_SET_ERROR )
1917 return NULL;
1919 /* Now follow it down a level. Guaranteed to succeed. */
1920 return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
1921 #elif GUEST_PAGING_LEVELS == 3 /* PAE... */
1922 /* We never demand-shadow PAE l3es: they are only created in
1923 * sh_update_cr3(). Check if the relevant sl3e is present. */
1924 shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.paging.shadow.l3table)
1925 + shadow_l3_linear_offset(gw->va);
1926 if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
1927 return NULL;
1928 *sl2mfn = shadow_l3e_get_mfn(*sl3e);
1929 ASSERT(mfn_valid(*sl2mfn));
1930 return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
1931 #else /* 32bit... */
1932 /* There is always a shadow of the top level table. Get it. */
1933 *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
1934 /* This next line is important: the guest l2 has a 16k
1935 * shadow, we need to return the right mfn of the four. This
1936 * call will set it for us as a side-effect. */
1937 (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
1938 /* Reading the top level table is always valid. */
1939 return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
1940 #endif
1944 static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v,
1945 walk_t *gw,
1946 mfn_t *sl1mfn,
1947 fetch_type_t ft)
1949 mfn_t sl2mfn;
1950 shadow_l2e_t *sl2e;
1952 /* Get the l2e */
1953 sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft);
1954 if ( sl2e == NULL ) return NULL;
1955 /* Install the sl1 in the l2e if it wasn't there or if we need to
1956 * re-do it to fix a PSE dirty bit. */
1957 if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT
1958 && likely(ft != ft_demand_write
1959 || (guest_l2e_get_flags(*gw->l2e) & _PAGE_DIRTY)
1960 || !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)) )
1962 *sl1mfn = shadow_l2e_get_mfn(*sl2e);
1963 ASSERT(mfn_valid(*sl1mfn));
1965 else
1967 shadow_l2e_t new_sl2e;
1968 int r, flags = guest_l2e_get_flags(*gw->l2e);
1969 /* No l1 shadow installed: find and install it. */
1970 if ( !(flags & _PAGE_PRESENT) )
1971 return NULL; /* No guest page. */
1972 if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) )
1974 /* Splintering a superpage */
1975 gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
1976 *sl1mfn = get_fl1_shadow_status(v, l2gfn);
1977 if ( !mfn_valid(*sl1mfn) )
1979 /* No fl1 shadow of this superpage exists at all: make one. */
1980 *sl1mfn = make_fl1_shadow(v, l2gfn);
1983 else
1985 /* Shadowing an actual guest l1 table */
1986 if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */
1987 *sl1mfn = get_shadow_status(v, gw->l1mfn, SH_type_l1_shadow);
1988 if ( !mfn_valid(*sl1mfn) )
1990 /* No l1 shadow of this page exists at all: make one. */
1991 *sl1mfn = sh_make_shadow(v, gw->l1mfn, SH_type_l1_shadow);
1994 /* Install the new sl1 table in the sl2e */
1995 l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn,
1996 *sl1mfn, &new_sl2e, ft);
1997 r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
1998 ASSERT((r & SHADOW_SET_FLUSH) == 0);
1999 if ( r & SHADOW_SET_ERROR )
2000 return NULL;
2001 /* This next line is important: in 32-on-PAE and 32-on-64 modes,
2002 * the guest l1 table has an 8k shadow, and we need to return
2003 * the right mfn of the pair. This call will set it for us as a
2004 * side-effect. (In all other cases, it's a no-op and will be
2005 * compiled out.) */
2006 (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
2008 /* Now follow it down a level. Guaranteed to succeed. */
2009 return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
2014 /**************************************************************************/
2015 /* Destructors for shadow tables:
2016 * Unregister the shadow, decrement refcounts of any entries present in it,
2017 * and release the memory.
2019 * N.B. These destructors do not clear the contents of the shadows.
2020 * This allows us to delay TLB shootdowns until the page is being reused.
2021 * See shadow_alloc() and shadow_free() for how this is handled.
2022 */
2024 #if GUEST_PAGING_LEVELS >= 4
2025 void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
2027 shadow_l4e_t *sl4e;
2028 u32 t = mfn_to_shadow_page(smfn)->type;
2029 mfn_t gmfn, sl4mfn;
2031 SHADOW_DEBUG(DESTROY_SHADOW,
2032 "%s(%05lx)\n", __func__, mfn_x(smfn));
2033 ASSERT(t == SH_type_l4_shadow);
2035 /* Record that the guest page isn't shadowed any more (in this type) */
2036 gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
2037 delete_shadow_status(v, gmfn, t, smfn);
2038 shadow_demote(v, gmfn, t);
2039 /* Decrement refcounts of all the old entries */
2040 sl4mfn = smfn;
2041 SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, v->domain, {
2042 if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT )
2044 sh_put_ref(v, shadow_l4e_get_mfn(*sl4e),
2045 (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT)
2046 | ((unsigned long)sl4e & ~PAGE_MASK));
2048 });
2050 /* Put the memory back in the pool */
2051 shadow_free(v->domain, smfn);
2054 void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
2056 shadow_l3e_t *sl3e;
2057 u32 t = mfn_to_shadow_page(smfn)->type;
2058 mfn_t gmfn, sl3mfn;
2060 SHADOW_DEBUG(DESTROY_SHADOW,
2061 "%s(%05lx)\n", __func__, mfn_x(smfn));
2062 ASSERT(t == SH_type_l3_shadow);
2064 /* Record that the guest page isn't shadowed any more (in this type) */
2065 gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
2066 delete_shadow_status(v, gmfn, t, smfn);
2067 shadow_demote(v, gmfn, t);
2069 /* Decrement refcounts of all the old entries */
2070 sl3mfn = smfn;
2071 SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
2072 if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT )
2073 sh_put_ref(v, shadow_l3e_get_mfn(*sl3e),
2074 (((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT)
2075 | ((unsigned long)sl3e & ~PAGE_MASK));
2076 });
2078 /* Put the memory back in the pool */
2079 shadow_free(v->domain, smfn);
2081 #endif /* GUEST_PAGING_LEVELS >= 4 */
2084 void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
2086 shadow_l2e_t *sl2e;
2087 u32 t = mfn_to_shadow_page(smfn)->type;
2088 mfn_t gmfn, sl2mfn;
2090 SHADOW_DEBUG(DESTROY_SHADOW,
2091 "%s(%05lx)\n", __func__, mfn_x(smfn));
2093 #if GUEST_PAGING_LEVELS >= 3
2094 ASSERT(t == SH_type_l2_shadow || t == SH_type_l2h_shadow);
2095 #else
2096 ASSERT(t == SH_type_l2_shadow);
2097 #endif
2099 /* Record that the guest page isn't shadowed any more (in this type) */
2100 gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
2101 delete_shadow_status(v, gmfn, t, smfn);
2102 shadow_demote(v, gmfn, t);
2104 /* Decrement refcounts of all the old entries */
2105 sl2mfn = smfn;
2106 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
2107 if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT )
2108 sh_put_ref(v, shadow_l2e_get_mfn(*sl2e),
2109 (((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
2110 | ((unsigned long)sl2e & ~PAGE_MASK));
2111 });
2113 /* Put the memory back in the pool */
2114 shadow_free(v->domain, smfn);
2117 void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
2119 struct domain *d = v->domain;
2120 shadow_l1e_t *sl1e;
2121 u32 t = mfn_to_shadow_page(smfn)->type;
2123 SHADOW_DEBUG(DESTROY_SHADOW,
2124 "%s(%05lx)\n", __func__, mfn_x(smfn));
2125 ASSERT(t == SH_type_l1_shadow || t == SH_type_fl1_shadow);
2127 /* Record that the guest page isn't shadowed any more (in this type) */
2128 if ( t == SH_type_fl1_shadow )
2130 gfn_t gfn = _gfn(mfn_to_shadow_page(smfn)->backpointer);
2131 delete_fl1_shadow_status(v, gfn, smfn);
2133 else
2135 mfn_t gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
2136 delete_shadow_status(v, gmfn, t, smfn);
2137 shadow_demote(v, gmfn, t);
2140 if ( shadow_mode_refcounts(d) )
2142 /* Decrement refcounts of all the old entries */
2143 mfn_t sl1mfn = smfn;
2144 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
2145 if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
2146 && !sh_l1e_is_magic(*sl1e) )
2147 shadow_put_page_from_l1e(*sl1e, d);
2148 });
2151 /* Put the memory back in the pool */
2152 shadow_free(v->domain, smfn);
2155 #if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
2156 void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
2158 struct domain *d = v->domain;
2159 ASSERT(mfn_to_shadow_page(mmfn)->type == SH_type_monitor_table);
2161 #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
2162 /* Need to destroy the l3 monitor page in slot 0 too */
2164 mfn_t m3mfn;
2165 l4_pgentry_t *l4e = sh_map_domain_page(mmfn);
2166 ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
2167 m3mfn = _mfn(l4e_get_pfn(l4e[0]));
2168 if ( is_pv_32on64_vcpu(v) )
2170 /* Need to destroy the l2 monitor page in slot 3 too */
2171 l3_pgentry_t *l3e = sh_map_domain_page(m3mfn);
2172 ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
2173 shadow_free(d, _mfn(l3e_get_pfn(l3e[3])));
2174 sh_unmap_domain_page(l3e);
2176 shadow_free(d, m3mfn);
2177 sh_unmap_domain_page(l4e);
2179 #elif CONFIG_PAGING_LEVELS == 3
2180 /* Need to destroy the l2 monitor page in slot 4 too */
2182 l3_pgentry_t *l3e = sh_map_domain_page(mmfn);
2183 ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
2184 shadow_free(d, _mfn(l3e_get_pfn(l3e[3])));
2185 sh_unmap_domain_page(l3e);
2187 #endif
2189 /* Put the memory back in the pool */
2190 shadow_free(d, mmfn);
2192 #endif
2194 /**************************************************************************/
2195 /* Functions to destroy non-Xen mappings in a pagetable hierarchy.
2196 * These are called from common code when we are running out of shadow
2197 * memory, and unpinning all the top-level shadows hasn't worked.
2199 * This implementation is pretty crude and slow, but we hope that it won't
2200 * be called very often. */
2202 #if GUEST_PAGING_LEVELS == 2
2204 void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
2206 shadow_l2e_t *sl2e;
2207 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
2208 (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
2209 });
2212 #elif GUEST_PAGING_LEVELS == 3
2214 void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn)
2215 /* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
2217 shadow_l2e_t *sl2e;
2218 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
2219 (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
2220 });
2223 #elif GUEST_PAGING_LEVELS == 4
2225 void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn)
2227 shadow_l4e_t *sl4e;
2228 SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, v->domain, {
2229 (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
2230 });
2233 #endif
2235 /**************************************************************************/
2236 /* Internal translation functions.
2237 * These functions require a pointer to the shadow entry that will be updated.
2238 */
2240 /* These functions take a new guest entry, translate it to shadow and write
2241 * the shadow entry.
2243 * They return the same bitmaps as the shadow_set_lXe() functions.
2244 */
2246 #if GUEST_PAGING_LEVELS >= 4
2247 static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
2249 shadow_l4e_t new_sl4e;
2250 guest_l4e_t *new_gl4e = new_ge;
2251 shadow_l4e_t *sl4p = se;
2252 mfn_t sl3mfn = _mfn(INVALID_MFN);
2253 struct domain *d = v->domain;
2254 p2m_type_t p2mt;
2255 int result = 0;
2257 perfc_incr(shadow_validate_gl4e_calls);
2259 if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
2261 gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
2262 mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
2263 if ( p2m_is_ram(p2mt) )
2264 sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
2265 else
2266 result |= SHADOW_SET_ERROR;
2268 l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
2269 sl3mfn, &new_sl4e, ft_prefetch);
2271 // check for updates to xen reserved slots
2272 if ( !shadow_mode_external(d) )
2274 int shadow_index = (((unsigned long)sl4p & ~PAGE_MASK) /
2275 sizeof(shadow_l4e_t));
2276 int reserved_xen_slot = !is_guest_l4_slot(d, shadow_index);
2278 if ( unlikely(reserved_xen_slot) )
2280 // attempt by the guest to write to a xen reserved slot
2281 //
2282 SHADOW_PRINTK("%s out-of-range update "
2283 "sl4mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
2284 __func__, mfn_x(sl4mfn), shadow_index, new_sl4e.l4);
2285 if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
2287 SHADOW_ERROR("out-of-range l4e update\n");
2288 result |= SHADOW_SET_ERROR;
2291 // do not call shadow_set_l4e...
2292 return result;
2296 result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
2297 return result;
2301 static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
2303 shadow_l3e_t new_sl3e;
2304 guest_l3e_t *new_gl3e = new_ge;
2305 shadow_l3e_t *sl3p = se;
2306 mfn_t sl2mfn = _mfn(INVALID_MFN);
2307 p2m_type_t p2mt;
2308 int result = 0;
2310 perfc_incr(shadow_validate_gl3e_calls);
2312 if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
2314 gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
2315 mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
2316 if ( p2m_is_ram(p2mt) )
2317 sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
2318 else
2319 result |= SHADOW_SET_ERROR;
2321 l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN),
2322 sl2mfn, &new_sl3e, ft_prefetch);
2323 result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
2325 return result;
2327 #endif // GUEST_PAGING_LEVELS >= 4
2329 static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
2331 shadow_l2e_t new_sl2e;
2332 guest_l2e_t *new_gl2e = new_ge;
2333 shadow_l2e_t *sl2p = se;
2334 mfn_t sl1mfn = _mfn(INVALID_MFN);
2335 p2m_type_t p2mt;
2336 int result = 0;
2338 perfc_incr(shadow_validate_gl2e_calls);
2340 if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
2342 gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
2343 if ( guest_supports_superpages(v) &&
2344 (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
2346 // superpage -- need to look up the shadow L1 which holds the
2347 // splitters...
2348 sl1mfn = get_fl1_shadow_status(v, gl1gfn);
2349 #if 0
2350 // XXX - it's possible that we want to do some kind of prefetch
2351 // for superpage fl1's here, but this is *not* on the demand path,
2352 // so we'll hold off trying that for now...
2353 //
2354 if ( !mfn_valid(sl1mfn) )
2355 sl1mfn = make_fl1_shadow(v, gl1gfn);
2356 #endif
2358 else
2360 mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
2361 if ( p2m_is_ram(p2mt) )
2362 sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
2363 else
2364 result |= SHADOW_SET_ERROR;
2367 l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
2368 sl1mfn, &new_sl2e, ft_prefetch);
2370 // check for updates to xen reserved slots in PV guests...
2371 // XXX -- need to revisit this for PV 3-on-4 guests.
2372 //
2373 #if SHADOW_PAGING_LEVELS < 4
2374 #if CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS
2375 if ( !shadow_mode_external(v->domain) )
2377 int shadow_index = (((unsigned long)sl2p & ~PAGE_MASK) /
2378 sizeof(shadow_l2e_t));
2379 int reserved_xen_slot;
2381 #if SHADOW_PAGING_LEVELS == 3
2382 reserved_xen_slot =
2383 ((mfn_to_shadow_page(sl2mfn)->type == SH_type_l2h_pae_shadow) &&
2384 (shadow_index
2385 >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
2386 #else /* SHADOW_PAGING_LEVELS == 2 */
2387 reserved_xen_slot = (shadow_index >= L2_PAGETABLE_FIRST_XEN_SLOT);
2388 #endif
2390 if ( unlikely(reserved_xen_slot) )
2392 // attempt by the guest to write to a xen reserved slot
2393 //
2394 SHADOW_PRINTK("%s out-of-range update "
2395 "sl2mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
2396 __func__, mfn_x(sl2mfn), shadow_index, new_sl2e.l2);
2397 if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
2399 SHADOW_ERROR("out-of-range l2e update\n");
2400 result |= SHADOW_SET_ERROR;
2403 // do not call shadow_set_l2e...
2404 return result;
2407 #endif /* CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS */
2408 #endif /* SHADOW_PAGING_LEVELS < 4 */
2410 result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn);
2412 return result;
2415 static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
2417 shadow_l1e_t new_sl1e;
2418 guest_l1e_t *new_gl1e = new_ge;
2419 shadow_l1e_t *sl1p = se;
2420 gfn_t gfn;
2421 mfn_t gmfn;
2422 p2m_type_t p2mt;
2423 int result = 0;
2425 perfc_incr(shadow_validate_gl1e_calls);
2427 gfn = guest_l1e_get_gfn(*new_gl1e);
2428 gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
2430 l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e,
2431 ft_prefetch, p2mt);
2433 result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
2434 return result;
2438 /**************************************************************************/
2439 /* Functions which translate and install the shadows of arbitrary guest
2440 * entries that we have just seen the guest write. */
2443 static inline int
2444 sh_map_and_validate(struct vcpu *v, mfn_t gmfn,
2445 void *new_gp, u32 size, u32 sh_type,
2446 u32 (*shadow_index)(mfn_t *smfn, u32 idx),
2447 int (*validate_ge)(struct vcpu *v, void *ge,
2448 mfn_t smfn, void *se))
2449 /* Generic function for mapping and validating. */
2451 mfn_t smfn, smfn2, map_mfn;
2452 shadow_l1e_t *sl1p;
2453 u32 shadow_idx, guest_idx;
2454 int result = 0;
2456 /* Align address and size to guest entry boundaries */
2457 size += (unsigned long)new_gp & (sizeof (guest_l1e_t) - 1);
2458 new_gp = (void *)((unsigned long)new_gp & ~(sizeof (guest_l1e_t) - 1));
2459 size = (size + sizeof (guest_l1e_t) - 1) & ~(sizeof (guest_l1e_t) - 1);
2460 ASSERT(size + (((unsigned long)new_gp) & ~PAGE_MASK) <= PAGE_SIZE);
2462 /* Map the shadow page */
2463 smfn = get_shadow_status(v, gmfn, sh_type);
2464 ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
2465 guest_idx = guest_index(new_gp);
2466 map_mfn = smfn;
2467 shadow_idx = shadow_index(&map_mfn, guest_idx);
2468 sl1p = map_shadow_page(map_mfn);
2470 /* Validate one entry at a time */
2471 while ( size )
2473 smfn2 = smfn;
2474 guest_idx = guest_index(new_gp);
2475 shadow_idx = shadow_index(&smfn2, guest_idx);
2476 if ( mfn_x(smfn2) != mfn_x(map_mfn) )
2478 /* We have moved to another page of the shadow */
2479 map_mfn = smfn2;
2480 unmap_shadow_page(sl1p);
2481 sl1p = map_shadow_page(map_mfn);
2483 result |= validate_ge(v,
2484 new_gp,
2485 map_mfn,
2486 &sl1p[shadow_idx]);
2487 size -= sizeof(guest_l1e_t);
2488 new_gp += sizeof(guest_l1e_t);
2490 unmap_shadow_page(sl1p);
2491 return result;
2495 int
2496 sh_map_and_validate_gl4e(struct vcpu *v, mfn_t gl4mfn,
2497 void *new_gl4p, u32 size)
2499 #if GUEST_PAGING_LEVELS >= 4
2500 return sh_map_and_validate(v, gl4mfn, new_gl4p, size,
2501 SH_type_l4_shadow,
2502 shadow_l4_index,
2503 validate_gl4e);
2504 #else // ! GUEST_PAGING_LEVELS >= 4
2505 SHADOW_ERROR("called in wrong paging mode!\n");
2506 BUG();
2507 return 0;
2508 #endif
2511 int
2512 sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
2513 void *new_gl3p, u32 size)
2515 #if GUEST_PAGING_LEVELS >= 4
2516 return sh_map_and_validate(v, gl3mfn, new_gl3p, size,
2517 SH_type_l3_shadow,
2518 shadow_l3_index,
2519 validate_gl3e);
2520 #else // ! GUEST_PAGING_LEVELS >= 4
2521 SHADOW_ERROR("called in wrong paging mode!\n");
2522 BUG();
2523 return 0;
2524 #endif
2527 int
2528 sh_map_and_validate_gl2e(struct vcpu *v, mfn_t gl2mfn,
2529 void *new_gl2p, u32 size)
2531 return sh_map_and_validate(v, gl2mfn, new_gl2p, size,
2532 SH_type_l2_shadow,
2533 shadow_l2_index,
2534 validate_gl2e);
2537 int
2538 sh_map_and_validate_gl2he(struct vcpu *v, mfn_t gl2mfn,
2539 void *new_gl2p, u32 size)
2541 #if GUEST_PAGING_LEVELS >= 3
2542 return sh_map_and_validate(v, gl2mfn, new_gl2p, size,
2543 SH_type_l2h_shadow,
2544 shadow_l2_index,
2545 validate_gl2e);
2546 #else /* Non-PAE guests don't have different kinds of l2 table */
2547 SHADOW_ERROR("called in wrong paging mode!\n");
2548 BUG();
2549 return 0;
2550 #endif
2553 int
2554 sh_map_and_validate_gl1e(struct vcpu *v, mfn_t gl1mfn,
2555 void *new_gl1p, u32 size)
2557 return sh_map_and_validate(v, gl1mfn, new_gl1p, size,
2558 SH_type_l1_shadow,
2559 shadow_l1_index,
2560 validate_gl1e);
2564 /**************************************************************************/
2565 /* Optimization: If we see two emulated writes of zeros to the same
2566 * page-table without another kind of page fault in between, we guess
2567 * that this is a batch of changes (for process destruction) and
2568 * unshadow the page so we don't take a pagefault on every entry. This
2569 * should also make finding writeable mappings of pagetables much
2570 * easier. */
2572 /* Look to see if this is the second emulated write in a row to this
2573 * page, and unshadow/unhook if it is */
2574 static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
2576 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
2577 if ( v->arch.paging.shadow.last_emulated_mfn == mfn_x(gmfn) &&
2578 sh_mfn_is_a_page_table(gmfn) )
2580 u32 flags = mfn_to_page(gmfn)->shadow_flags;
2581 if ( !(flags & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
2583 perfc_incr(shadow_early_unshadow);
2584 sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
2587 v->arch.paging.shadow.last_emulated_mfn = mfn_x(gmfn);
2588 #endif
2591 /* Stop counting towards early unshadows, as we've seen a real page fault */
2592 static inline void reset_early_unshadow(struct vcpu *v)
2594 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
2595 v->arch.paging.shadow.last_emulated_mfn = INVALID_MFN;
2596 #endif
2601 /**************************************************************************/
2602 /* Optimization: Prefetch multiple L1 entries. This is called after we have
2603 * demand-faulted a shadow l1e in the fault handler, to see if it's
2604 * worth fetching some more.
2605 */
2607 #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
2609 /* XXX magic number */
2610 #define PREFETCH_DISTANCE 32
2612 static void sh_prefetch(struct vcpu *v, walk_t *gw,
2613 shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
2615 int i, dist;
2616 gfn_t gfn;
2617 mfn_t gmfn;
2618 guest_l1e_t gl1e;
2619 shadow_l1e_t sl1e;
2620 u32 gflags;
2621 p2m_type_t p2mt;
2623 /* Prefetch no further than the end of the _shadow_ l1 MFN */
2624 dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
2625 /* And no more than a maximum fetches-per-fault */
2626 if ( dist > PREFETCH_DISTANCE )
2627 dist = PREFETCH_DISTANCE;
2629 for ( i = 1; i < dist ; i++ )
2631 /* No point in prefetching if there's already a shadow */
2632 if ( ptr_sl1e[i].l1 != 0 )
2633 break;
2635 if ( gw->l1e )
2637 /* Normal guest page; grab the next guest entry */
2638 gl1e = gw->l1e[i];
2639 /* Not worth continuing if we hit an entry that will need another
2640 * fault for A/D-bit propagation anyway */
2641 gflags = guest_l1e_get_flags(gl1e);
2642 if ( (gflags & _PAGE_PRESENT)
2643 && (!(gflags & _PAGE_ACCESSED)
2644 || ((gflags & _PAGE_RW) && !(gflags & _PAGE_DIRTY))) )
2645 break;
2647 else
2649 /* Fragmented superpage, unless we've been called wrongly */
2650 ASSERT(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE);
2651 /* Increment the l1e's GFN by the right number of guest pages */
2652 gl1e = guest_l1e_from_gfn(
2653 _gfn(gfn_x(guest_l1e_get_gfn(gw->eff_l1e)) + i),
2654 guest_l1e_get_flags(gw->eff_l1e));
2657 /* Look at the gfn that the l1e is pointing at */
2658 gfn = guest_l1e_get_gfn(gl1e);
2659 gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
2661 /* Propagate the entry. Safe to use a pointer to our local
2662 * gl1e, since this is not a demand-fetch so there will be no
2663 * write-back to the guest. */
2664 l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
2665 gmfn, &sl1e, ft_prefetch, p2mt);
2666 (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
2670 #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
2673 /**************************************************************************/
2674 /* Entry points into the shadow code */
2676 /* Called from pagefault handler in Xen, and from the HVM trap handlers
2677 * for pagefaults. Returns 1 if this fault was an artefact of the
2678 * shadow code (and the guest should retry) or 0 if it is not (and the
2679 * fault should be handled elsewhere or passed to the guest). */
2681 static int sh_page_fault(struct vcpu *v,
2682 unsigned long va,
2683 struct cpu_user_regs *regs)
2685 struct domain *d = v->domain;
2686 walk_t gw;
2687 u32 accumulated_gflags;
2688 gfn_t gfn;
2689 mfn_t gmfn, sl1mfn=_mfn(0);
2690 shadow_l1e_t sl1e, *ptr_sl1e;
2691 paddr_t gpa;
2692 struct sh_emulate_ctxt emul_ctxt;
2693 struct x86_emulate_ops *emul_ops;
2694 int r;
2695 fetch_type_t ft = 0;
2696 p2m_type_t p2mt;
2698 SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
2699 v->domain->domain_id, v->vcpu_id, va, regs->error_code);
2701 perfc_incr(shadow_fault);
2702 //
2703 // XXX: Need to think about eventually mapping superpages directly in the
2704 // shadow (when possible), as opposed to splintering them into a
2705 // bunch of 4K maps.
2706 //
2708 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) && SHADOW_PAGING_LEVELS > 2
2709 if ( (regs->error_code & PFEC_reserved_bit) )
2711 /* The only reasons for reserved bits to be set in shadow entries
2712 * are the two "magic" shadow_l1e entries. */
2713 if ( likely((__copy_from_user(&sl1e,
2714 (sh_linear_l1_table(v)
2715 + shadow_l1_linear_offset(va)),
2716 sizeof(sl1e)) == 0)
2717 && sh_l1e_is_magic(sl1e)) )
2719 if ( sh_l1e_is_gnp(sl1e) )
2721 /* Not-present in a guest PT: pass to the guest as
2722 * a not-present fault (by flipping two bits). */
2723 ASSERT(regs->error_code & PFEC_page_present);
2724 regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
2725 reset_early_unshadow(v);
2726 perfc_incr(shadow_fault_fast_gnp);
2727 SHADOW_PRINTK("fast path not-present\n");
2728 return 0;
2730 else
2732 /* Magic MMIO marker: extract gfn for MMIO address */
2733 ASSERT(sh_l1e_is_mmio(sl1e));
2734 gpa = (((paddr_t)(gfn_x(sh_l1e_mmio_get_gfn(sl1e))))
2735 << PAGE_SHIFT)
2736 | (va & ~PAGE_MASK);
2738 perfc_incr(shadow_fault_fast_mmio);
2739 SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
2740 reset_early_unshadow(v);
2741 handle_mmio(gpa);
2742 return EXCRET_fault_fixed;
2744 else
2746 /* This should be exceptionally rare: another vcpu has fixed
2747 * the tables between the fault and our reading the l1e.
2748 * Retry and let the hardware give us the right fault next time. */
2749 perfc_incr(shadow_fault_fast_fail);
2750 SHADOW_PRINTK("fast path false alarm!\n");
2751 return EXCRET_fault_fixed;
2754 #endif /* SHOPT_FAST_FAULT_PATH */
2756 /* Detect if this page fault happened while we were already in Xen
2757 * doing a shadow operation. If that happens, the only thing we can
2758 * do is let Xen's normal fault handlers try to fix it. In any case,
2759 * a diagnostic trace of the fault will be more useful than
2760 * a BUG() when we try to take the lock again. */
2761 if ( unlikely(shadow_locked_by_me(d)) )
2763 SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n",
2764 d->arch.paging.shadow.locker_function);
2765 return 0;
2768 shadow_lock(d);
2770 shadow_audit_tables(v);
2772 if ( guest_walk_tables(v, va, &gw, 1) != 0 )
2774 SHADOW_PRINTK("malformed guest pagetable\n");
2775 print_gw(&gw);
2778 /* It's possible that the guest has put pagetables in memory that it has
2779 * already used for some special purpose (ioreq pages, or granted pages).
2780 * If that happens we'll have killed the guest already but it's still not
2781 * safe to propagate entries out of the guest PT so get out now. */
2782 if ( unlikely(d->is_shutting_down) )
2784 SHADOW_PRINTK("guest is shutting down\n");
2785 shadow_unlock(d);
2786 return 0;
2789 sh_audit_gw(v, &gw);
2791 // We do not look at the gw->l1e, as that will not exist for superpages.
2792 // Instead, we use the gw->eff_l1e...
2793 //
2794 // We need not check all the levels of the guest page table entries for
2795 // present vs not-present, as the eff_l1e will always be not present if
2796 // one of the higher level entries is not present.
2797 //
2798 if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
2800 perfc_incr(shadow_fault_bail_not_present);
2801 goto not_a_shadow_fault;
2804 // All levels of the guest page table are now known to be present.
2805 accumulated_gflags = accumulate_guest_flags(v, &gw);
2807 // Check for attempts to access supervisor-only pages from user mode,
2808 // i.e. ring 3. Such errors are not caused or dealt with by the shadow
2809 // code.
2810 //
2811 if ( (regs->error_code & PFEC_user_mode) &&
2812 !(accumulated_gflags & _PAGE_USER) )
2814 /* illegal user-mode access to supervisor-only page */
2815 perfc_incr(shadow_fault_bail_user_supervisor);
2816 goto not_a_shadow_fault;
2819 // Was it a write fault?
2820 ft = ((regs->error_code & PFEC_write_access)
2821 ? ft_demand_write : ft_demand_read);
2822 if ( ft == ft_demand_write )
2824 if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
2826 perfc_incr(shadow_fault_bail_ro_mapping);
2827 goto not_a_shadow_fault;
2830 else // must have been either an insn fetch or read fault
2832 // Check for NX bit violations: attempts to execute code that is
2833 // marked "do not execute". Such errors are not caused or dealt with
2834 // by the shadow code.
2835 //
2836 if ( regs->error_code & PFEC_insn_fetch )
2838 if ( accumulated_gflags & _PAGE_NX_BIT )
2840 /* NX prevented this code fetch */
2841 perfc_incr(shadow_fault_bail_nx);
2842 goto not_a_shadow_fault;
2847 /* What mfn is the guest trying to access? */
2848 gfn = guest_l1e_get_gfn(gw.eff_l1e);
2849 gmfn = gfn_to_mfn(d, gfn, &p2mt);
2851 if ( shadow_mode_refcounts(d) &&
2852 (!p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn))) )
2854 perfc_incr(shadow_fault_bail_bad_gfn);
2855 SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n",
2856 gfn_x(gfn), mfn_x(gmfn));
2857 goto not_a_shadow_fault;
2860 /* Make sure there is enough free shadow memory to build a chain of
2861 * shadow tables. (We never allocate a top-level shadow on this path,
2862 * only a 32b l1, pae l1, or 64b l3+2+1. Note that while
2863 * SH_type_l1_shadow isn't correct in the latter case, all page
2864 * tables are the same size there.) */
2865 shadow_prealloc(d,
2866 SH_type_l1_shadow,
2867 GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
2869 /* Acquire the shadow. This must happen before we figure out the rights
2870 * for the shadow entry, since we might promote a page here. */
2871 ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft);
2872 if ( unlikely(ptr_sl1e == NULL) )
2874 /* Couldn't get the sl1e! Since we know the guest entries
2875 * are OK, this can only have been caused by a failed
2876 * shadow_set_l*e(), which will have crashed the guest.
2877 * Get out of the fault handler immediately. */
2878 ASSERT(d->is_shutting_down);
2879 unmap_walk(v, &gw);
2880 shadow_unlock(d);
2881 return 0;
2884 /* Calculate the shadow entry and write it */
2885 l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn,
2886 gmfn, &sl1e, ft, p2mt);
2887 r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
2889 #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
2890 /* Prefetch some more shadow entries */
2891 sh_prefetch(v, &gw, ptr_sl1e, sl1mfn);
2892 #endif
2894 /* Need to emulate accesses to page tables */
2895 if ( sh_mfn_is_a_page_table(gmfn) )
2897 if ( ft == ft_demand_write )
2899 perfc_incr(shadow_fault_emulate_write);
2900 goto emulate;
2902 else if ( shadow_mode_trap_reads(d) && ft == ft_demand_read )
2904 perfc_incr(shadow_fault_emulate_read);
2905 goto emulate;
2909 /* Need to hand off device-model MMIO and writes to read-only
2910 * memory to the device model */
2911 if ( p2mt == p2m_mmio_dm
2912 || (p2mt == p2m_ram_ro && ft == ft_demand_write) )
2914 gpa = guest_walk_to_gpa(&gw);
2915 goto mmio;
2918 perfc_incr(shadow_fault_fixed);
2919 d->arch.paging.log_dirty.fault_count++;
2920 reset_early_unshadow(v);
2922 done:
2923 sh_audit_gw(v, &gw);
2924 unmap_walk(v, &gw);
2925 SHADOW_PRINTK("fixed\n");
2926 shadow_audit_tables(v);
2927 shadow_unlock(d);
2928 return EXCRET_fault_fixed;
2930 emulate:
2931 if ( !shadow_mode_refcounts(d) || !guest_mode(regs) )
2932 goto not_a_shadow_fault;
2934 /*
2935 * We do not emulate user writes. Instead we use them as a hint that the
2936 * page is no longer a page table. This behaviour differs from native, but
2937 * it seems very unlikely that any OS grants user access to page tables.
2938 */
2939 if ( (regs->error_code & PFEC_user_mode) )
2941 SHADOW_PRINTK("user-mode fault to PT, unshadowing mfn %#lx\n",
2942 mfn_x(gmfn));
2943 perfc_incr(shadow_fault_emulate_failed);
2944 sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
2945 goto done;
2948 if ( is_hvm_domain(d) )
2950 /*
2951 * If we are in the middle of injecting an exception or interrupt then
2952 * we should not emulate: it is not the instruction at %eip that caused
2953 * the fault. Furthermore it is almost certainly the case the handler
2954 * stack is currently considered to be a page table, so we should
2955 * unshadow the faulting page before exiting.
2956 */
2957 if ( unlikely(hvm_event_pending(v)) )
2959 gdprintk(XENLOG_DEBUG, "write to pagetable during event "
2960 "injection: cr2=%#lx, mfn=%#lx\n",
2961 va, mfn_x(gmfn));
2962 sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
2963 goto done;
2967 SHADOW_PRINTK("emulate: eip=%#lx esp=%#lx\n",
2968 (unsigned long)regs->eip, (unsigned long)regs->esp);
2970 /*
2971 * We don't need to hold the lock for the whole emulation; we will
2972 * take it again when we write to the pagetables.
2973 */
2974 sh_audit_gw(v, &gw);
2975 unmap_walk(v, &gw);
2976 shadow_audit_tables(v);
2977 shadow_unlock(d);
2979 emul_ops = shadow_init_emulation(&emul_ctxt, regs);
2981 r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
2983 /*
2984 * NB. We do not unshadow on X86EMUL_EXCEPTION. It's not clear that it
2985 * would be a good unshadow hint. If we *do* decide to unshadow-on-fault
2986 * then it must be 'failable': we cannot require the unshadow to succeed.
2987 */
2988 if ( r == X86EMUL_UNHANDLEABLE )
2990 SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n",
2991 mfn_x(gmfn));
2992 perfc_incr(shadow_fault_emulate_failed);
2993 /* If this is actually a page table, then we have a bug, and need
2994 * to support more operations in the emulator. More likely,
2995 * though, this is a hint that this page should not be shadowed. */
2996 shadow_remove_all_shadows(v, gmfn);
2999 #if GUEST_PAGING_LEVELS == 3 /* PAE guest */
3000 if ( r == X86EMUL_OKAY ) {
3001 int i;
3002 /* Emulate up to four extra instructions in the hope of catching
3003 * the "second half" of a 64-bit pagetable write. */
3004 for ( i = 0 ; i < 4 ; i++ )
3006 shadow_continue_emulation(&emul_ctxt, regs);
3007 v->arch.paging.last_write_was_pt = 0;
3008 r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
3009 if ( r == X86EMUL_OKAY )
3011 if ( v->arch.paging.last_write_was_pt )
3013 perfc_incr(shadow_em_ex_pt);
3014 break; /* Don't emulate past the other half of the write */
3016 else
3017 perfc_incr(shadow_em_ex_non_pt);
3019 else
3021 perfc_incr(shadow_em_ex_fail);
3022 break; /* Don't emulate again if we failed! */
3026 #endif /* PAE guest */
3028 SHADOW_PRINTK("emulated\n");
3029 return EXCRET_fault_fixed;
3031 mmio:
3032 if ( !guest_mode(regs) )
3033 goto not_a_shadow_fault;
3034 perfc_incr(shadow_fault_mmio);
3035 sh_audit_gw(v, &gw);
3036 unmap_walk(v, &gw);
3037 SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa);
3038 shadow_audit_tables(v);
3039 reset_early_unshadow(v);
3040 shadow_unlock(d);
3041 handle_mmio(gpa);
3042 return EXCRET_fault_fixed;
3044 not_a_shadow_fault:
3045 sh_audit_gw(v, &gw);
3046 unmap_walk(v, &gw);
3047 SHADOW_PRINTK("not a shadow fault\n");
3048 shadow_audit_tables(v);
3049 reset_early_unshadow(v);
3050 shadow_unlock(d);
3051 return 0;
3055 static int
3056 sh_invlpg(struct vcpu *v, unsigned long va)
3057 /* Called when the guest requests an invlpg. Returns 1 if the invlpg
3058 * instruction should be issued on the hardware, or 0 if it's safe not
3059 * to do so. */
3061 shadow_l2e_t sl2e;
3063 perfc_incr(shadow_invlpg);
3065 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
3066 /* No longer safe to use cached gva->gfn translations */
3067 vtlb_flush(v);
3068 #endif
3070 /* First check that we can safely read the shadow l2e. SMP/PAE linux can
3071 * run as high as 6% of invlpg calls where we haven't shadowed the l2
3072 * yet. */
3073 #if SHADOW_PAGING_LEVELS == 4
3075 shadow_l3e_t sl3e;
3076 if ( !(shadow_l4e_get_flags(
3077 sh_linear_l4_table(v)[shadow_l4_linear_offset(va)])
3078 & _PAGE_PRESENT) )
3079 return 0;
3080 /* This must still be a copy-from-user because we don't have the
3081 * shadow lock, and the higher-level shadows might disappear
3082 * under our feet. */
3083 if ( __copy_from_user(&sl3e, (sh_linear_l3_table(v)
3084 + shadow_l3_linear_offset(va)),
3085 sizeof (sl3e)) != 0 )
3087 perfc_incr(shadow_invlpg_fault);
3088 return 0;
3090 if ( (!shadow_l3e_get_flags(sl3e) & _PAGE_PRESENT) )
3091 return 0;
3093 #elif SHADOW_PAGING_LEVELS == 3
3094 if ( !(l3e_get_flags(v->arch.paging.shadow.l3table[shadow_l3_linear_offset(va)])
3095 & _PAGE_PRESENT) )
3096 // no need to flush anything if there's no SL2...
3097 return 0;
3098 #endif
3100 /* This must still be a copy-from-user because we don't have the shadow
3101 * lock, and the higher-level shadows might disappear under our feet. */
3102 if ( __copy_from_user(&sl2e,
3103 sh_linear_l2_table(v) + shadow_l2_linear_offset(va),
3104 sizeof (sl2e)) != 0 )
3106 perfc_incr(shadow_invlpg_fault);
3107 return 0;
3110 // If there's nothing shadowed for this particular sl2e, then
3111 // there is no need to do an invlpg, either...
3112 //
3113 if ( !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) )
3114 return 0;
3116 // Check to see if the SL2 is a splintered superpage...
3117 // If so, then we'll need to flush the entire TLB (because that's
3118 // easier than invalidating all of the individual 4K pages).
3119 //
3120 if ( mfn_to_shadow_page(shadow_l2e_get_mfn(sl2e))->type
3121 == SH_type_fl1_shadow )
3123 flush_tlb_local();
3124 return 0;
3127 return 1;
3131 static unsigned long
3132 sh_gva_to_gfn(struct vcpu *v, unsigned long va)
3133 /* Called to translate a guest virtual address to what the *guest*
3134 * pagetables would map it to. */
3136 walk_t gw;
3137 gfn_t gfn;
3139 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
3140 struct shadow_vtlb t = {0};
3141 if ( vtlb_lookup(v, va, &t) )
3142 return t.frame_number;
3143 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
3145 guest_walk_tables(v, va, &gw, 0);
3146 gfn = guest_walk_to_gfn(&gw);
3148 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
3149 t.page_number = va >> PAGE_SHIFT;
3150 t.frame_number = gfn_x(gfn);
3151 t.flags = accumulate_guest_flags(v, &gw);
3152 vtlb_insert(v, t);
3153 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
3155 unmap_walk(v, &gw);
3156 return gfn_x(gfn);
3160 static inline void
3161 sh_update_linear_entries(struct vcpu *v)
3162 /* Sync up all the linear mappings for this vcpu's pagetables */
3164 struct domain *d = v->domain;
3166 /* Linear pagetables in PV guests
3167 * ------------------------------
3169 * Guest linear pagetables, which map the guest pages, are at
3170 * LINEAR_PT_VIRT_START. Shadow linear pagetables, which map the
3171 * shadows, are at SH_LINEAR_PT_VIRT_START. Most of the time these
3172 * are set up at shadow creation time, but (of course!) the PAE case
3173 * is subtler. Normal linear mappings are made by having an entry
3174 * in the top-level table that points to itself (shadow linear) or
3175 * to the guest top-level table (guest linear). For PAE, to set up
3176 * a linear map requires us to copy the four top-level entries into
3177 * level-2 entries. That means that every time we change a PAE l3e,
3178 * we need to reflect the change into the copy.
3180 * Linear pagetables in HVM guests
3181 * -------------------------------
3183 * For HVM guests, the linear pagetables are installed in the monitor
3184 * tables (since we can't put them in the shadow). Shadow linear
3185 * pagetables, which map the shadows, are at SH_LINEAR_PT_VIRT_START,
3186 * and we use the linear pagetable slot at LINEAR_PT_VIRT_START for
3187 * a linear pagetable of the monitor tables themselves. We have
3188 * the same issue of having to re-copy PAE l3 entries whevever we use
3189 * PAE shadows.
3191 * Because HVM guests run on the same monitor tables regardless of the
3192 * shadow tables in use, the linear mapping of the shadow tables has to
3193 * be updated every time v->arch.shadow_table changes.
3194 */
3196 /* Don't try to update the monitor table if it doesn't exist */
3197 if ( shadow_mode_external(d)
3198 && pagetable_get_pfn(v->arch.monitor_table) == 0 )
3199 return;
3201 #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 4)
3203 /* For PV, one l4e points at the guest l4, one points at the shadow
3204 * l4. No maintenance required.
3205 * For HVM, just need to update the l4e that points to the shadow l4. */
3207 if ( shadow_mode_external(d) )
3209 /* Use the linear map if we can; otherwise make a new mapping */
3210 if ( v == current )
3212 __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] =
3213 l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
3214 __PAGE_HYPERVISOR);
3216 else
3218 l4_pgentry_t *ml4e;
3219 ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
3220 ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
3221 l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
3222 __PAGE_HYPERVISOR);
3223 sh_unmap_domain_page(ml4e);
3227 #elif (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 3)
3229 /* PV: XXX
3231 * HVM: To give ourselves a linear map of the shadows, we need to
3232 * extend a PAE shadow to 4 levels. We do this by having a monitor
3233 * l3 in slot 0 of the monitor l4 table, and copying the PAE l3
3234 * entries into it. Then, by having the monitor l4e for shadow
3235 * pagetables also point to the monitor l4, we can use it to access
3236 * the shadows.
3237 */
3239 if ( shadow_mode_external(d) )
3241 /* Install copies of the shadow l3es into the monitor l3 table.
3242 * The monitor l3 table is hooked into slot 0 of the monitor
3243 * l4 table, so we use l3 linear indices 0 to 3 */
3244 shadow_l3e_t *sl3e;
3245 l3_pgentry_t *ml3e;
3246 mfn_t l3mfn;
3247 int i;
3249 /* Use linear mappings if we can; otherwise make new mappings */
3250 if ( v == current )
3252 ml3e = __linear_l3_table;
3253 l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
3255 else
3257 l4_pgentry_t *ml4e;
3258 ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
3259 ASSERT(l4e_get_flags(ml4e[0]) & _PAGE_PRESENT);
3260 l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
3261 ml3e = sh_map_domain_page(l3mfn);
3262 sh_unmap_domain_page(ml4e);
3265 /* Shadow l3 tables are made up by sh_update_cr3 */
3266 sl3e = v->arch.paging.shadow.l3table;
3268 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
3270 ml3e[i] =
3271 (shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT)
3272 ? l3e_from_pfn(mfn_x(shadow_l3e_get_mfn(sl3e[i])),
3273 __PAGE_HYPERVISOR)
3274 : l3e_empty();
3277 if ( v != current )
3278 sh_unmap_domain_page(ml3e);
3280 else
3281 domain_crash(d); /* XXX */
3283 #elif CONFIG_PAGING_LEVELS == 3
3285 /* PV: need to copy the guest's l3 entries into the guest-linear-map l2
3286 * entries in the shadow, and the shadow's l3 entries into the
3287 * shadow-linear-map l2 entries in the shadow. This is safe to do
3288 * because Xen does not let guests share high-slot l2 tables between l3s,
3289 * so we know we're not treading on anyone's toes.
3291 * HVM: need to copy the shadow's l3 entries into the
3292 * shadow-linear-map l2 entries in the monitor table. This is safe
3293 * because we have one monitor table for each vcpu. The monitor's
3294 * own l3es don't need to be copied because they never change.
3295 * XXX That might change if we start stuffing things into the rest
3296 * of the monitor's virtual address space.
3297 */
3299 l2_pgentry_t *l2e, new_l2e;
3300 shadow_l3e_t *guest_l3e = NULL, *shadow_l3e;
3301 int i;
3302 int unmap_l2e = 0;
3304 #if GUEST_PAGING_LEVELS == 2
3306 /* Shadow l3 tables were built by sh_update_cr3 */
3307 BUG_ON(!shadow_mode_external(d)); /* PV 2-on-3 is unsupported */
3308 shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table;
3310 #else /* GUEST_PAGING_LEVELS == 3 */
3312 shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table;
3313 guest_l3e = (guest_l3e_t *)&v->arch.paging.shadow.gl3e;
3315 #endif /* GUEST_PAGING_LEVELS */
3317 /* Choose where to write the entries, using linear maps if possible */
3318 if ( shadow_mode_external(d) )
3320 if ( v == current )
3322 /* From the monitor tables, it's safe to use linear maps
3323 * to update monitor l2s */
3324 l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
3326 else
3328 /* Map the monitor table's high l2 */
3329 l3_pgentry_t *l3e;
3330 l3e = sh_map_domain_page(
3331 pagetable_get_mfn(v->arch.monitor_table));
3332 ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
3333 l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
3334 unmap_l2e = 1;
3335 sh_unmap_domain_page(l3e);
3338 else
3340 /* Map the shadow table's high l2 */
3341 ASSERT(shadow_l3e_get_flags(shadow_l3e[3]) & _PAGE_PRESENT);
3342 l2e = sh_map_domain_page(shadow_l3e_get_mfn(shadow_l3e[3]));
3343 unmap_l2e = 1;
3346 /* Write linear mapping of guest (only in PV, and only when
3347 * not translated). */
3348 if ( !shadow_mode_translate(d) )
3350 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
3352 new_l2e =
3353 ((shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT)
3354 ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
3355 __PAGE_HYPERVISOR)
3356 : l2e_empty());
3357 safe_write_entry(
3358 &l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i],
3359 &new_l2e);
3363 /* Write linear mapping of shadow. */
3364 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
3366 new_l2e = (shadow_l3e_get_flags(shadow_l3e[i]) & _PAGE_PRESENT)
3367 ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(shadow_l3e[i])),
3368 __PAGE_HYPERVISOR)
3369 : l2e_empty();
3370 safe_write_entry(
3371 &l2e[l2_table_offset(SH_LINEAR_PT_VIRT_START) + i],
3372 &new_l2e);
3375 if ( unmap_l2e )
3376 sh_unmap_domain_page(l2e);
3379 #elif CONFIG_PAGING_LEVELS == 2
3381 /* For PV, one l2e points at the guest l2, one points at the shadow
3382 * l2. No maintenance required.
3383 * For HVM, just need to update the l2e that points to the shadow l2. */
3385 if ( shadow_mode_external(d) )
3387 /* Use the linear map if we can; otherwise make a new mapping */
3388 if ( v == current )
3390 __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] =
3391 l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
3392 __PAGE_HYPERVISOR);
3394 else
3396 l2_pgentry_t *ml2e;
3397 ml2e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
3398 ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
3399 l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table[0]),
3400 __PAGE_HYPERVISOR);
3401 sh_unmap_domain_page(ml2e);
3405 #else
3406 #error this should not happen
3407 #endif
3409 if ( shadow_mode_external(d) )
3411 /*
3412 * Having modified the linear pagetable mapping, flush local host TLBs.
3413 * This was not needed when vmenter/vmexit always had the side effect
3414 * of flushing host TLBs but, with ASIDs, it is possible to finish
3415 * this CR3 update, vmenter the guest, vmexit due to a page fault,
3416 * without an intervening host TLB flush. Then the page fault code
3417 * could use the linear pagetable to read a top-level shadow page
3418 * table entry. But, without this change, it would fetch the wrong
3419 * value due to a stale TLB.
3420 */
3421 flush_tlb_local();
3426 /* Removes vcpu->arch.paging.shadow.guest_vtable and vcpu->arch.shadow_table[].
3427 * Does all appropriate management/bookkeeping/refcounting/etc...
3428 */
3429 static void
3430 sh_detach_old_tables(struct vcpu *v)
3432 mfn_t smfn;
3433 int i = 0;
3435 ////
3436 //// vcpu->arch.paging.shadow.guest_vtable
3437 ////
3439 #if GUEST_PAGING_LEVELS == 3
3440 /* PAE guests don't have a mapping of the guest top-level table */
3441 ASSERT(v->arch.paging.shadow.guest_vtable == NULL);
3442 #else
3443 if ( v->arch.paging.shadow.guest_vtable )
3445 struct domain *d = v->domain;
3446 if ( shadow_mode_external(d) || shadow_mode_translate(d) )
3447 sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
3448 v->arch.paging.shadow.guest_vtable = NULL;
3450 #endif
3453 ////
3454 //// vcpu->arch.shadow_table[]
3455 ////
3457 #if GUEST_PAGING_LEVELS == 3
3458 /* PAE guests have four shadow_table entries */
3459 for ( i = 0 ; i < 4 ; i++ )
3460 #endif
3462 smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
3463 if ( mfn_x(smfn) )
3464 sh_put_ref(v, smfn, 0);
3465 v->arch.shadow_table[i] = pagetable_null();
3469 /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
3470 static void
3471 sh_set_toplevel_shadow(struct vcpu *v,
3472 int slot,
3473 mfn_t gmfn,
3474 unsigned int root_type)
3476 mfn_t smfn;
3477 pagetable_t old_entry, new_entry;
3479 struct domain *d = v->domain;
3481 /* Remember the old contents of this slot */
3482 old_entry = v->arch.shadow_table[slot];
3484 /* Now figure out the new contents: is this a valid guest MFN? */
3485 if ( !mfn_valid(gmfn) )
3487 new_entry = pagetable_null();
3488 goto install_new_entry;
3491 /* Guest mfn is valid: shadow it and install the shadow */
3492 smfn = get_shadow_status(v, gmfn, root_type);
3493 if ( !mfn_valid(smfn) )
3495 /* Make sure there's enough free shadow memory. */
3496 shadow_prealloc(d, root_type, 1);
3497 /* Shadow the page. */
3498 smfn = sh_make_shadow(v, gmfn, root_type);
3500 ASSERT(mfn_valid(smfn));
3502 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
3503 /* Once again OK to unhook entries from this table if we see fork/exit */
3504 ASSERT(sh_mfn_is_a_page_table(gmfn));
3505 mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
3506 #endif
3508 /* Pin the shadow and put it (back) on the list of pinned shadows */
3509 if ( sh_pin(v, smfn) == 0 )
3511 SHADOW_ERROR("can't pin %#lx as toplevel shadow\n", mfn_x(smfn));
3512 domain_crash(v->domain);
3515 /* Take a ref to this page: it will be released in sh_detach_old_tables()
3516 * or the next call to set_toplevel_shadow() */
3517 if ( !sh_get_ref(v, smfn, 0) )
3519 SHADOW_ERROR("can't install %#lx as toplevel shadow\n", mfn_x(smfn));
3520 domain_crash(v->domain);
3523 new_entry = pagetable_from_mfn(smfn);
3525 install_new_entry:
3526 /* Done. Install it */
3527 SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n",
3528 GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot,
3529 mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry)));
3530 v->arch.shadow_table[slot] = new_entry;
3532 /* Decrement the refcount of the old contents of this slot */
3533 if ( !pagetable_is_null(old_entry) )
3534 sh_put_ref(v, pagetable_get_mfn(old_entry), 0);
3538 static void
3539 sh_update_cr3(struct vcpu *v, int do_locking)
3540 /* Updates vcpu->arch.cr3 after the guest has changed CR3.
3541 * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
3542 * if appropriate).
3543 * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works;
3544 * this function will call hvm_update_guest_cr(v, 3) to tell them where the
3545 * shadow tables are.
3546 * If do_locking != 0, assume we are being called from outside the
3547 * shadow code, and must take and release the shadow lock; otherwise
3548 * that is the caller's responsibility.
3549 */
3551 struct domain *d = v->domain;
3552 mfn_t gmfn;
3553 #if GUEST_PAGING_LEVELS == 3
3554 guest_l3e_t *gl3e;
3555 u32 guest_idx=0;
3556 int i;
3557 #endif
3559 /* Don't do anything on an uninitialised vcpu */
3560 if ( !is_hvm_domain(d) && !v->is_initialised )
3562 ASSERT(v->arch.cr3 == 0);
3563 return;
3566 if ( do_locking ) shadow_lock(v->domain);
3568 ASSERT(shadow_locked_by_me(v->domain));
3569 ASSERT(v->arch.paging.mode);
3571 ////
3572 //// vcpu->arch.guest_table is already set
3573 ////
3575 #ifndef NDEBUG
3576 /* Double-check that the HVM code has sent us a sane guest_table */
3577 if ( is_hvm_domain(d) )
3579 ASSERT(shadow_mode_external(d));
3580 if ( hvm_paging_enabled(v) )
3581 ASSERT(pagetable_get_pfn(v->arch.guest_table));
3582 else
3583 ASSERT(v->arch.guest_table.pfn
3584 == d->arch.paging.shadow.unpaged_pagetable.pfn);
3586 #endif
3588 SHADOW_PRINTK("d=%u v=%u guest_table=%05lx\n",
3589 d->domain_id, v->vcpu_id,
3590 (unsigned long)pagetable_get_pfn(v->arch.guest_table));
3592 #if GUEST_PAGING_LEVELS == 4
3593 if ( !(v->arch.flags & TF_kernel_mode) && !is_pv_32on64_vcpu(v) )
3594 gmfn = pagetable_get_mfn(v->arch.guest_table_user);
3595 else
3596 #endif
3597 gmfn = pagetable_get_mfn(v->arch.guest_table);
3600 ////
3601 //// vcpu->arch.paging.shadow.guest_vtable
3602 ////
3603 #if GUEST_PAGING_LEVELS == 4
3604 if ( shadow_mode_external(d) || shadow_mode_translate(d) )
3606 if ( v->arch.paging.shadow.guest_vtable )
3607 sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
3608 v->arch.paging.shadow.guest_vtable = sh_map_domain_page_global(gmfn);
3609 /* PAGING_LEVELS==4 implies 64-bit, which means that
3610 * map_domain_page_global can't fail */
3611 BUG_ON(v->arch.paging.shadow.guest_vtable == NULL);
3613 else
3614 v->arch.paging.shadow.guest_vtable = __linear_l4_table;
3615 #elif GUEST_PAGING_LEVELS == 3
3616 /* On PAE guests we don't use a mapping of the guest's own top-level
3617 * table. We cache the current state of that table and shadow that,
3618 * until the next CR3 write makes us refresh our cache. */
3619 ASSERT(v->arch.paging.shadow.guest_vtable == NULL);
3621 if ( shadow_mode_external(d) )
3622 /* Find where in the page the l3 table is */
3623 guest_idx = guest_index((void *)v->arch.hvm_vcpu.guest_cr[3]);
3624 else
3625 /* PV guest: l3 is at the start of a page */
3626 guest_idx = 0;
3628 // Ignore the low 2 bits of guest_idx -- they are really just
3629 // cache control.
3630 guest_idx &= ~3;
3632 gl3e = ((guest_l3e_t *)sh_map_domain_page(gmfn)) + guest_idx;
3633 for ( i = 0; i < 4 ; i++ )
3634 v->arch.paging.shadow.gl3e[i] = gl3e[i];
3635 sh_unmap_domain_page(gl3e);
3636 #elif GUEST_PAGING_LEVELS == 2
3637 if ( shadow_mode_external(d) || shadow_mode_translate(d) )
3639 if ( v->arch.paging.shadow.guest_vtable )
3640 sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
3641 v->arch.paging.shadow.guest_vtable = sh_map_domain_page_global(gmfn);
3642 /* Does this really need map_domain_page_global? Handle the
3643 * error properly if so. */
3644 BUG_ON(v->arch.paging.shadow.guest_vtable == NULL); /* XXX */
3646 else
3647 v->arch.paging.shadow.guest_vtable = __linear_l2_table;
3648 #else
3649 #error this should never happen
3650 #endif
3652 #if 0
3653 printk("%s %s %d gmfn=%05lx shadow.guest_vtable=%p\n",
3654 __func__, __FILE__, __LINE__, gmfn, v->arch.paging.shadow.guest_vtable);
3655 #endif
3657 ////
3658 //// vcpu->arch.shadow_table[]
3659 ////
3661 /* We revoke write access to the new guest toplevel page(s) before we
3662 * replace the old shadow pagetable(s), so that we can safely use the
3663 * (old) shadow linear maps in the writeable mapping heuristics. */
3664 #if GUEST_PAGING_LEVELS == 2
3665 if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 )
3666 flush_tlb_mask(v->domain->domain_dirty_cpumask);
3667 sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow);
3668 #elif GUEST_PAGING_LEVELS == 3
3669 /* PAE guests have four shadow_table entries, based on the
3670 * current values of the guest's four l3es. */
3672 int flush = 0;
3673 gfn_t gl2gfn;
3674 mfn_t gl2mfn;
3675 p2m_type_t p2mt;
3676 guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e;
3677 /* First, make all four entries read-only. */
3678 for ( i = 0; i < 4; i++ )
3680 if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
3682 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
3683 gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
3684 if ( p2m_is_ram(p2mt) )
3685 flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
3688 if ( flush )
3689 flush_tlb_mask(v->domain->domain_dirty_cpumask);
3690 /* Now install the new shadows. */
3691 for ( i = 0; i < 4; i++ )
3693 if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
3695 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
3696 gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
3697 if ( p2m_is_ram(p2mt) )
3698 sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3)
3699 ? SH_type_l2h_shadow
3700 : SH_type_l2_shadow);
3701 else
3702 sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0);
3704 else
3705 sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0);
3708 #elif GUEST_PAGING_LEVELS == 4
3709 if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 )
3710 flush_tlb_mask(v->domain->domain_dirty_cpumask);
3711 sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow);
3712 #else
3713 #error This should never happen
3714 #endif
3716 #if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
3717 #endif
3719 ///
3720 /// v->arch.paging.shadow.l3table
3721 ///
3722 #if SHADOW_PAGING_LEVELS == 3
3724 mfn_t smfn;
3725 int i;
3726 for ( i = 0; i < 4; i++ )
3728 #if GUEST_PAGING_LEVELS == 2
3729 /* 2-on-3: make a PAE l3 that points at the four-page l2 */
3730 smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[0]) + i);
3731 #else
3732 /* 3-on-3: make a PAE l3 that points at the four l2 pages */
3733 smfn = pagetable_get_mfn(v->arch.shadow_table[i]);
3734 #endif
3735 v->arch.paging.shadow.l3table[i] =
3736 (mfn_x(smfn) == 0)
3737 ? shadow_l3e_empty()
3738 : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT);
3741 #endif /* SHADOW_PAGING_LEVELS == 3 */
3744 ///
3745 /// v->arch.cr3
3746 ///
3747 if ( shadow_mode_external(d) )
3749 make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
3751 else // not shadow_mode_external...
3753 /* We don't support PV except guest == shadow == config levels */
3754 BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
3755 #if SHADOW_PAGING_LEVELS == 3
3756 /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated.
3757 * Don't use make_cr3 because (a) we know it's below 4GB, and
3758 * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */
3759 ASSERT(virt_to_maddr(&v->arch.paging.shadow.l3table) <= 0xffffffe0ULL);
3760 v->arch.cr3 = virt_to_maddr(&v->arch.paging.shadow.l3table);
3761 #else
3762 /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
3763 make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0]));
3764 #endif
3768 ///
3769 /// v->arch.hvm_vcpu.hw_cr[3]
3770 ///
3771 if ( shadow_mode_external(d) )
3773 ASSERT(is_hvm_domain(d));
3774 #if SHADOW_PAGING_LEVELS == 3
3775 /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */
3776 v->arch.hvm_vcpu.hw_cr[3] =
3777 virt_to_maddr(&v->arch.paging.shadow.l3table);
3778 #else
3779 /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */
3780 v->arch.hvm_vcpu.hw_cr[3] =
3781 pagetable_get_paddr(v->arch.shadow_table[0]);
3782 #endif
3783 hvm_update_guest_cr(v, 3);
3786 /* Fix up the linear pagetable mappings */
3787 sh_update_linear_entries(v);
3789 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
3790 /* No longer safe to use cached gva->gfn translations */
3791 vtlb_flush(v);
3792 #endif
3794 /* Release the lock, if we took it (otherwise it's the caller's problem) */
3795 if ( do_locking ) shadow_unlock(v->domain);
3799 /**************************************************************************/
3800 /* Functions to revoke guest rights */
3802 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
3803 static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
3804 /* Look up this vaddr in the current shadow and see if it's a writeable
3805 * mapping of this gmfn. If so, remove it. Returns 1 if it worked. */
3807 shadow_l1e_t sl1e, *sl1p;
3808 shadow_l2e_t *sl2p;
3809 #if SHADOW_PAGING_LEVELS >= 3
3810 shadow_l3e_t *sl3p;
3811 #if SHADOW_PAGING_LEVELS >= 4
3812 shadow_l4e_t *sl4p;
3813 #endif
3814 #endif
3815 mfn_t sl1mfn;
3816 int r;
3818 /* Carefully look in the shadow linear map for the l1e we expect */
3819 #if SHADOW_PAGING_LEVELS >= 4
3820 sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
3821 if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
3822 return 0;
3823 sl3p = sh_linear_l3_table(v) + shadow_l3_linear_offset(vaddr);
3824 if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
3825 return 0;
3826 #elif SHADOW_PAGING_LEVELS == 3
3827 sl3p = ((shadow_l3e_t *) v->arch.paging.shadow.l3table)
3828 + shadow_l3_linear_offset(vaddr);
3829 if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
3830 return 0;
3831 #endif
3832 sl2p = sh_linear_l2_table(v) + shadow_l2_linear_offset(vaddr);
3833 if ( !(shadow_l2e_get_flags(*sl2p) & _PAGE_PRESENT) )
3834 return 0;
3835 sl1p = sh_linear_l1_table(v) + shadow_l1_linear_offset(vaddr);
3836 sl1e = *sl1p;
3837 if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW))
3838 != (_PAGE_PRESENT|_PAGE_RW))
3839 || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) )
3840 return 0;
3842 /* Found it! Need to remove its write permissions. */
3843 sl1mfn = shadow_l2e_get_mfn(*sl2p);
3844 sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
3845 r = shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
3846 ASSERT( !(r & SHADOW_SET_ERROR) );
3847 return 1;
3849 #endif
3851 int sh_rm_write_access_from_l1(struct vcpu *v, mfn_t sl1mfn,
3852 mfn_t readonly_mfn)
3853 /* Excises all writeable mappings to readonly_mfn from this l1 shadow table */
3855 shadow_l1e_t *sl1e;
3856 int done = 0;
3857 int flags;
3858 mfn_t base_sl1mfn = sl1mfn; /* Because sl1mfn changes in the foreach */
3860 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done,
3862 flags = shadow_l1e_get_flags(*sl1e);
3863 if ( (flags & _PAGE_PRESENT)
3864 && (flags & _PAGE_RW)
3865 && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
3867 shadow_l1e_t ro_sl1e = shadow_l1e_remove_flags(*sl1e, _PAGE_RW);
3868 (void) shadow_set_l1e(v, sl1e, ro_sl1e, sl1mfn);
3869 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
3870 /* Remember the last shadow that we shot a writeable mapping in */
3871 v->arch.paging.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn);
3872 #endif
3873 if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info
3874 & PGT_count_mask) == 0 )
3875 /* This breaks us cleanly out of the FOREACH macro */
3876 done = 1;
3878 });
3879 return done;
3883 int sh_rm_mappings_from_l1(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
3884 /* Excises all mappings to guest frame from this shadow l1 table */
3886 shadow_l1e_t *sl1e;
3887 int done = 0;
3888 int flags;
3890 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done,
3892 flags = shadow_l1e_get_flags(*sl1e);
3893 if ( (flags & _PAGE_PRESENT)
3894 && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(target_mfn)) )
3896 (void) shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
3897 if ( (mfn_to_page(target_mfn)->count_info & PGC_count_mask) == 0 )
3898 /* This breaks us cleanly out of the FOREACH macro */
3899 done = 1;
3901 });
3902 return done;
3905 /**************************************************************************/
3906 /* Functions to excise all pointers to shadows from higher-level shadows. */
3908 void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
3909 /* Blank out a single shadow entry */
3911 switch ( mfn_to_shadow_page(smfn)->type )
3913 case SH_type_l1_shadow:
3914 (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
3915 case SH_type_l2_shadow:
3916 #if GUEST_PAGING_LEVELS >= 3
3917 case SH_type_l2h_shadow:
3918 #endif
3919 (void) shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
3920 #if GUEST_PAGING_LEVELS >= 4
3921 case SH_type_l3_shadow:
3922 (void) shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
3923 case SH_type_l4_shadow:
3924 (void) shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
3925 #endif
3926 default: BUG(); /* Called with the wrong kind of shadow. */
3930 int sh_remove_l1_shadow(struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn)
3931 /* Remove all mappings of this l1 shadow from this l2 shadow */
3933 shadow_l2e_t *sl2e;
3934 int done = 0;
3935 int flags;
3937 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, done, v->domain,
3939 flags = shadow_l2e_get_flags(*sl2e);
3940 if ( (flags & _PAGE_PRESENT)
3941 && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
3943 (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
3944 if ( mfn_to_shadow_page(sl1mfn)->type == 0 )
3945 /* This breaks us cleanly out of the FOREACH macro */
3946 done = 1;
3948 });
3949 return done;
3952 #if GUEST_PAGING_LEVELS >= 4
3953 int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
3954 /* Remove all mappings of this l2 shadow from this l3 shadow */
3956 shadow_l3e_t *sl3e;
3957 int done = 0;
3958 int flags;
3960 SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, done,
3962 flags = shadow_l3e_get_flags(*sl3e);
3963 if ( (flags & _PAGE_PRESENT)
3964 && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
3966 (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
3967 if ( mfn_to_shadow_page(sl2mfn)->type == 0 )
3968 /* This breaks us cleanly out of the FOREACH macro */
3969 done = 1;
3971 });
3972 return done;
3975 int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
3976 /* Remove all mappings of this l3 shadow from this l4 shadow */
3978 shadow_l4e_t *sl4e;
3979 int done = 0;
3980 int flags;
3982 SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, done, v->domain,
3984 flags = shadow_l4e_get_flags(*sl4e);
3985 if ( (flags & _PAGE_PRESENT)
3986 && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
3988 (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
3989 if ( mfn_to_shadow_page(sl3mfn)->type == 0 )
3990 /* This breaks us cleanly out of the FOREACH macro */
3991 done = 1;
3993 });
3994 return done;
3996 #endif /* 64bit guest */
3998 /**************************************************************************/
3999 /* Handling HVM guest writes to pagetables */
4001 /* Check that the user is allowed to perform this write.
4002 * Returns a mapped pointer to write to, and the mfn it's on,
4003 * or NULL for error. */
4004 static inline void * emulate_map_dest(struct vcpu *v,
4005 unsigned long vaddr,
4006 struct sh_emulate_ctxt *sh_ctxt,
4007 mfn_t *mfnp)
4009 walk_t gw;
4010 u32 flags, errcode;
4011 gfn_t gfn;
4012 mfn_t mfn;
4013 p2m_type_t p2mt;
4015 /* We don't emulate user-mode writes to page tables */
4016 if ( ring_3(sh_ctxt->ctxt.regs) )
4017 return NULL;
4019 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
4020 /* Try the virtual TLB first */
4022 struct shadow_vtlb t = {0};
4023 if ( vtlb_lookup(v, vaddr, &t)
4024 && ((t.flags & (_PAGE_PRESENT|_PAGE_RW))
4025 == (_PAGE_PRESENT|_PAGE_RW)) )
4027 flags = t.flags;
4028 gfn = _gfn(t.frame_number);
4030 else
4032 /* Need to do the full lookup, just in case permissions
4033 * have increased since we cached this entry */
4035 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
4037 /* Walk the guest pagetables */
4038 guest_walk_tables(v, vaddr, &gw, 1);
4039 flags = accumulate_guest_flags(v, &gw);
4040 gfn = guest_l1e_get_gfn(gw.eff_l1e);
4041 sh_audit_gw(v, &gw);
4042 unmap_walk(v, &gw);
4044 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
4045 /* Remember this translation for next time */
4046 t.page_number = vaddr >> PAGE_SHIFT;
4047 t.frame_number = gfn_x(gfn);
4048 t.flags = flags;
4049 vtlb_insert(v, t);
4052 #endif
4054 errcode = PFEC_write_access;
4055 if ( !(flags & _PAGE_PRESENT) )
4056 goto page_fault;
4058 errcode |= PFEC_page_present;
4059 if ( !(flags & _PAGE_RW) )
4060 goto page_fault;
4062 mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
4063 if ( p2m_is_ram(p2mt) )
4065 ASSERT(mfn_valid(mfn));
4066 *mfnp = mfn;
4067 v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
4068 return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
4070 else
4071 return NULL;
4073 page_fault:
4074 if ( is_hvm_vcpu(v) )
4075 hvm_inject_exception(TRAP_page_fault, errcode, vaddr);
4076 else
4077 propagate_page_fault(vaddr, errcode);
4078 return NULL;
4081 static int safe_not_to_verify_write(mfn_t gmfn, void *dst, void *src,
4082 int bytes)
4084 #if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY)
4085 struct page_info *pg = mfn_to_page(gmfn);
4086 if ( !(pg->shadow_flags & SHF_32)
4087 && ((unsigned long)dst & 7) == 0 )
4089 /* Not shadowed 32-bit: aligned 64-bit writes that leave the
4090 * present bit unset are safe to ignore. */
4091 if ( (*(u64*)src & _PAGE_PRESENT) == 0
4092 && (*(u64*)dst & _PAGE_PRESENT) == 0 )
4093 return 1;
4095 else if ( !(pg->shadow_flags & (SHF_PAE|SHF_64))
4096 && ((unsigned long)dst & 3) == 0 )
4098 /* Not shadowed PAE/64-bit: aligned 32-bit writes that leave the
4099 * present bit unset are safe to ignore. */
4100 if ( (*(u32*)src & _PAGE_PRESENT) == 0
4101 && (*(u32*)dst & _PAGE_PRESENT) == 0 )
4102 return 1;
4104 #endif
4105 return 0;
4109 int
4110 sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
4111 u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
4113 mfn_t mfn;
4114 void *addr;
4115 int skip;
4117 if ( vaddr & (bytes-1) )
4118 return X86EMUL_UNHANDLEABLE;
4120 ASSERT(((vaddr & ~PAGE_MASK) + bytes) <= PAGE_SIZE);
4121 shadow_lock(v->domain);
4123 addr = emulate_map_dest(v, vaddr, sh_ctxt, &mfn);
4124 if ( addr == NULL )
4126 shadow_unlock(v->domain);
4127 return X86EMUL_EXCEPTION;
4130 skip = safe_not_to_verify_write(mfn, addr, src, bytes);
4131 memcpy(addr, src, bytes);
4132 if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
4134 /* If we are writing zeros to this page, might want to unshadow */
4135 if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) )
4136 check_for_early_unshadow(v, mfn);
4137 else
4138 reset_early_unshadow(v);
4140 paging_mark_dirty(v->domain, mfn_x(mfn));
4142 sh_unmap_domain_page(addr);
4143 shadow_audit_tables(v);
4144 shadow_unlock(v->domain);
4145 return X86EMUL_OKAY;
4148 int
4149 sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
4150 unsigned long old, unsigned long new,
4151 unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
4153 mfn_t mfn;
4154 void *addr;
4155 unsigned long prev;
4156 int rv = X86EMUL_OKAY, skip;
4158 ASSERT(bytes <= sizeof(unsigned long));
4159 shadow_lock(v->domain);
4161 if ( vaddr & (bytes-1) )
4162 return X86EMUL_UNHANDLEABLE;
4164 addr = emulate_map_dest(v, vaddr, sh_ctxt, &mfn);
4165 if ( addr == NULL )
4167 shadow_unlock(v->domain);
4168 return X86EMUL_EXCEPTION;
4171 skip = safe_not_to_verify_write(mfn, &new, &old, bytes);
4173 switch ( bytes )
4175 case 1: prev = cmpxchg(((u8 *)addr), old, new); break;
4176 case 2: prev = cmpxchg(((u16 *)addr), old, new); break;
4177 case 4: prev = cmpxchg(((u32 *)addr), old, new); break;
4178 case 8: prev = cmpxchg(((u64 *)addr), old, new); break;
4179 default:
4180 SHADOW_PRINTK("cmpxchg of size %i is not supported\n", bytes);
4181 prev = ~old;
4184 if ( prev == old )
4186 if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
4188 else
4189 rv = X86EMUL_CMPXCHG_FAILED;
4191 SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
4192 " wanted %#lx now %#lx bytes %u\n",
4193 vaddr, prev, old, new, *(unsigned long *)addr, bytes);
4195 /* If we are writing zeros to this page, might want to unshadow */
4196 if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) )
4197 check_for_early_unshadow(v, mfn);
4198 else
4199 reset_early_unshadow(v);
4201 paging_mark_dirty(v->domain, mfn_x(mfn));
4203 sh_unmap_domain_page(addr);
4204 shadow_audit_tables(v);
4205 shadow_unlock(v->domain);
4206 return rv;
4209 int
4210 sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr,
4211 unsigned long old_lo, unsigned long old_hi,
4212 unsigned long new_lo, unsigned long new_hi,
4213 struct sh_emulate_ctxt *sh_ctxt)
4215 mfn_t mfn;
4216 void *addr;
4217 u64 old, new, prev;
4218 int rv = X86EMUL_OKAY, skip;
4220 if ( vaddr & 7 )
4221 return X86EMUL_UNHANDLEABLE;
4223 shadow_lock(v->domain);
4225 addr = emulate_map_dest(v, vaddr, sh_ctxt, &mfn);
4226 if ( addr == NULL )
4228 shadow_unlock(v->domain);
4229 return X86EMUL_EXCEPTION;
4232 old = (((u64) old_hi) << 32) | (u64) old_lo;
4233 new = (((u64) new_hi) << 32) | (u64) new_lo;
4234 skip = safe_not_to_verify_write(mfn, &new, &old, 8);
4235 prev = cmpxchg(((u64 *)addr), old, new);
4237 if ( prev == old )
4239 if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, 8);
4241 else
4242 rv = X86EMUL_CMPXCHG_FAILED;
4244 /* If we are writing zeros to this page, might want to unshadow */
4245 if ( *(u32 *)addr == 0 )
4246 check_for_early_unshadow(v, mfn);
4247 else
4248 reset_early_unshadow(v);
4250 paging_mark_dirty(v->domain, mfn_x(mfn));
4252 sh_unmap_domain_page(addr);
4253 shadow_audit_tables(v);
4254 shadow_unlock(v->domain);
4255 return rv;
4259 /**************************************************************************/
4260 /* Audit tools */
4262 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
4264 #define AUDIT_FAIL(_level, _fmt, _a...) do { \
4265 printk("Shadow %u-on-%u audit failed at level %i, index %i\n" \
4266 "gl" #_level "mfn = %" PRI_mfn \
4267 " sl" #_level "mfn = %" PRI_mfn \
4268 " &gl" #_level "e = %p &sl" #_level "e = %p" \
4269 " gl" #_level "e = %" SH_PRI_gpte \
4270 " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n", \
4271 GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, \
4272 _level, guest_index(gl ## _level ## e), \
4273 mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn), \
4274 gl ## _level ## e, sl ## _level ## e, \
4275 gl ## _level ## e->l ## _level, sl ## _level ## e->l ## _level, \
4276 ##_a); \
4277 BUG(); \
4278 done = 1; \
4279 } while (0)
4282 static char * sh_audit_flags(struct vcpu *v, int level,
4283 int gflags, int sflags)
4284 /* Common code for auditing flag bits */
4286 if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_PRESENT) )
4287 return "shadow is present but guest is not present";
4288 if ( (sflags & _PAGE_GLOBAL) && !is_hvm_vcpu(v) )
4289 return "global bit set in PV shadow";
4290 if ( level == 2 && (sflags & _PAGE_PSE) )
4291 return "PS bit set in shadow";
4292 #if SHADOW_PAGING_LEVELS == 3
4293 if ( level == 3 ) return NULL; /* All the other bits are blank in PAEl3 */
4294 #endif
4295 if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_ACCESSED) )
4296 return "accessed bit not propagated";
4297 if ( (level == 1 || (level == 2 && (gflags & _PAGE_PSE)))
4298 && ((sflags & _PAGE_RW) && !(gflags & _PAGE_DIRTY)) )
4299 return "dirty bit not propagated";
4300 if ( (sflags & _PAGE_USER) != (gflags & _PAGE_USER) )
4301 return "user/supervisor bit does not match";
4302 if ( (sflags & _PAGE_NX_BIT) != (gflags & _PAGE_NX_BIT) )
4303 return "NX bit does not match";
4304 if ( (sflags & _PAGE_RW) && !(gflags & _PAGE_RW) )
4305 return "shadow grants write access but guest does not";
4306 return NULL;
4309 static inline mfn_t
4310 audit_gfn_to_mfn(struct vcpu *v, gfn_t gfn, mfn_t gmfn)
4311 /* Convert this gfn to an mfn in the manner appropriate for the
4312 * guest pagetable it's used in (gmfn) */
4314 p2m_type_t p2mt;
4315 if ( !shadow_mode_translate(v->domain) )
4316 return _mfn(gfn_x(gfn));
4318 if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_type_mask)
4319 != PGT_writable_page )
4320 return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
4321 else
4322 return gfn_to_mfn(v->domain, gfn, &p2mt);
4326 int sh_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
4328 guest_l1e_t *gl1e, *gp;
4329 shadow_l1e_t *sl1e;
4330 mfn_t mfn, gmfn, gl1mfn;
4331 gfn_t gfn;
4332 char *s;
4333 int done = 0;
4335 /* Follow the backpointer */
4336 gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
4337 gl1e = gp = sh_map_domain_page(gl1mfn);
4338 SHADOW_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, {
4340 if ( sh_l1e_is_magic(*sl1e) )
4342 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) && SHADOW_PAGING_LEVELS > 2
4343 if ( sh_l1e_is_gnp(*sl1e) )
4345 if ( guest_l1e_get_flags(*gl1e) & _PAGE_PRESENT )
4346 AUDIT_FAIL(1, "shadow is GNP magic but guest is present");
4348 else
4350 ASSERT(sh_l1e_is_mmio(*sl1e));
4351 gfn = sh_l1e_mmio_get_gfn(*sl1e);
4352 if ( gfn_x(gfn) != gfn_x(guest_l1e_get_gfn(*gl1e)) )
4353 AUDIT_FAIL(1, "shadow MMIO gfn is %" SH_PRI_gfn
4354 " but guest gfn is %" SH_PRI_gfn,
4355 gfn_x(gfn),
4356 gfn_x(guest_l1e_get_gfn(*gl1e)));
4358 #endif
4360 else
4362 s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e),
4363 shadow_l1e_get_flags(*sl1e));
4364 if ( s ) AUDIT_FAIL(1, "%s", s);
4366 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4368 gfn = guest_l1e_get_gfn(*gl1e);
4369 mfn = shadow_l1e_get_mfn(*sl1e);
4370 gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn);
4371 if ( mfn_x(gmfn) != mfn_x(mfn) )
4372 AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
4373 " --> %" PRI_mfn " != mfn %" PRI_mfn,
4374 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
4377 });
4378 sh_unmap_domain_page(gp);
4379 return done;
4382 int sh_audit_fl1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
4384 guest_l1e_t *gl1e, e;
4385 shadow_l1e_t *sl1e;
4386 mfn_t gl1mfn = _mfn(INVALID_MFN);
4387 int f;
4388 int done = 0;
4390 /* fl1 has no useful backpointer: all we can check are flags */
4391 e = guest_l1e_from_gfn(_gfn(0), 0); gl1e = &e; /* Needed for macro */
4392 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, {
4393 f = shadow_l1e_get_flags(*sl1e);
4394 f &= ~(_PAGE_AVAIL0|_PAGE_AVAIL1|_PAGE_AVAIL2);
4395 if ( !(f == 0
4396 || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
4397 _PAGE_ACCESSED|_PAGE_DIRTY)
4398 || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
4399 || sh_l1e_is_magic(*sl1e)) )
4400 AUDIT_FAIL(1, "fl1e has bad flags");
4401 });
4402 return 0;
4405 int sh_audit_l2_table(struct vcpu *v, mfn_t sl2mfn, mfn_t x)
4407 guest_l2e_t *gl2e, *gp;
4408 shadow_l2e_t *sl2e;
4409 mfn_t mfn, gmfn, gl2mfn;
4410 gfn_t gfn;
4411 char *s;
4412 int done = 0;
4414 /* Follow the backpointer */
4415 gl2mfn = _mfn(mfn_to_shadow_page(sl2mfn)->backpointer);
4416 gl2e = gp = sh_map_domain_page(gl2mfn);
4417 SHADOW_FOREACH_L2E(sl2mfn, sl2e, &gl2e, done, v->domain, {
4419 s = sh_audit_flags(v, 2, guest_l2e_get_flags(*gl2e),
4420 shadow_l2e_get_flags(*sl2e));
4421 if ( s ) AUDIT_FAIL(2, "%s", s);
4423 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4425 gfn = guest_l2e_get_gfn(*gl2e);
4426 mfn = shadow_l2e_get_mfn(*sl2e);
4427 gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)
4428 ? get_fl1_shadow_status(v, gfn)
4429 : get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl2mfn),
4430 SH_type_l1_shadow);
4431 if ( mfn_x(gmfn) != mfn_x(mfn) )
4432 AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
4433 " (--> %" PRI_mfn ")"
4434 " --> %" PRI_mfn " != mfn %" PRI_mfn,
4435 gfn_x(gfn),
4436 (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
4437 : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)),
4438 mfn_x(gmfn), mfn_x(mfn));
4440 });
4441 sh_unmap_domain_page(gp);
4442 return 0;
4445 #if GUEST_PAGING_LEVELS >= 4
4446 int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
4448 guest_l3e_t *gl3e, *gp;
4449 shadow_l3e_t *sl3e;
4450 mfn_t mfn, gmfn, gl3mfn;
4451 gfn_t gfn;
4452 char *s;
4453 int done = 0;
4455 /* Follow the backpointer */
4456 gl3mfn = _mfn(mfn_to_shadow_page(sl3mfn)->backpointer);
4457 gl3e = gp = sh_map_domain_page(gl3mfn);
4458 SHADOW_FOREACH_L3E(sl3mfn, sl3e, &gl3e, done, {
4460 s = sh_audit_flags(v, 3, guest_l3e_get_flags(*gl3e),
4461 shadow_l3e_get_flags(*sl3e));
4462 if ( s ) AUDIT_FAIL(3, "%s", s);
4464 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4466 gfn = guest_l3e_get_gfn(*gl3e);
4467 mfn = shadow_l3e_get_mfn(*sl3e);
4468 gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl3mfn),
4469 ((GUEST_PAGING_LEVELS == 3 ||
4470 is_pv_32on64_vcpu(v))
4471 && !shadow_mode_external(v->domain)
4472 && (guest_index(gl3e) % 4) == 3)
4473 ? SH_type_l2h_shadow
4474 : SH_type_l2_shadow);
4475 if ( mfn_x(gmfn) != mfn_x(mfn) )
4476 AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn
4477 " --> %" PRI_mfn " != mfn %" PRI_mfn,
4478 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
4480 });
4481 sh_unmap_domain_page(gp);
4482 return 0;
4485 int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
4487 guest_l4e_t *gl4e, *gp;
4488 shadow_l4e_t *sl4e;
4489 mfn_t mfn, gmfn, gl4mfn;
4490 gfn_t gfn;
4491 char *s;
4492 int done = 0;
4494 /* Follow the backpointer */
4495 gl4mfn = _mfn(mfn_to_shadow_page(sl4mfn)->backpointer);
4496 gl4e = gp = sh_map_domain_page(gl4mfn);
4497 SHADOW_FOREACH_L4E(sl4mfn, sl4e, &gl4e, done, v->domain,
4499 s = sh_audit_flags(v, 4, guest_l4e_get_flags(*gl4e),
4500 shadow_l4e_get_flags(*sl4e));
4501 if ( s ) AUDIT_FAIL(4, "%s", s);
4503 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4505 gfn = guest_l4e_get_gfn(*gl4e);
4506 mfn = shadow_l4e_get_mfn(*sl4e);
4507 gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl4mfn),
4508 SH_type_l3_shadow);
4509 if ( mfn_x(gmfn) != mfn_x(mfn) )
4510 AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn
4511 " --> %" PRI_mfn " != mfn %" PRI_mfn,
4512 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
4514 });
4515 sh_unmap_domain_page(gp);
4516 return 0;
4518 #endif /* GUEST_PAGING_LEVELS >= 4 */
4521 #undef AUDIT_FAIL
4523 #endif /* Audit code */
4525 /**************************************************************************/
4526 /* Entry points into this mode of the shadow code.
4527 * This will all be mangled by the preprocessor to uniquify everything. */
4528 struct paging_mode sh_paging_mode = {
4529 .page_fault = sh_page_fault,
4530 .invlpg = sh_invlpg,
4531 .gva_to_gfn = sh_gva_to_gfn,
4532 .update_cr3 = sh_update_cr3,
4533 .update_paging_modes = shadow_update_paging_modes,
4534 .write_p2m_entry = shadow_write_p2m_entry,
4535 .write_guest_entry = shadow_write_guest_entry,
4536 .cmpxchg_guest_entry = shadow_cmpxchg_guest_entry,
4537 .guest_map_l1e = sh_guest_map_l1e,
4538 .guest_get_eff_l1e = sh_guest_get_eff_l1e,
4539 .guest_levels = GUEST_PAGING_LEVELS,
4540 .shadow.detach_old_tables = sh_detach_old_tables,
4541 .shadow.x86_emulate_write = sh_x86_emulate_write,
4542 .shadow.x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg,
4543 .shadow.x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b,
4544 .shadow.make_monitor_table = sh_make_monitor_table,
4545 .shadow.destroy_monitor_table = sh_destroy_monitor_table,
4546 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
4547 .shadow.guess_wrmap = sh_guess_wrmap,
4548 #endif
4549 .shadow.shadow_levels = SHADOW_PAGING_LEVELS,
4550 };
4552 /*
4553 * Local variables:
4554 * mode: C
4555 * c-set-style: "BSD"
4556 * c-basic-offset: 4
4557 * indent-tabs-mode: nil
4558 * End:
4559 */