ia64/xen-unstable

view xen/arch/x86/mm/shadow/multi.c @ 11688:477a0084ff47

[XEN] Recompute shadows of PAE PSE l2es when needed.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <tim.deegan@xensource.com>
date Fri Sep 29 11:57:06 2006 +0100 (2006-09-29)
parents b6ee084892da
children 6e932f32662c
line source
1 /******************************************************************************
2 * arch/x86/mm/shadow/multi.c
3 *
4 * Simple, mostly-synchronous shadow page tables.
5 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
6 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
7 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 // DESIGN QUESTIONS:
25 // Why use subshadows for PAE guests?
26 // - reduces pressure in the hash table
27 // - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
28 // - would need to find space in the page_info to store 7 more bits of
29 // backpointer
30 // - independent shadows of 32 byte chunks makes it non-obvious how to quickly
31 // figure out when to demote the guest page from l3 status
32 //
33 // PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
34 // - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
35 // space for both PV and HVM guests.
36 //
38 #include <xen/config.h>
39 #include <xen/types.h>
40 #include <xen/mm.h>
41 #include <xen/trace.h>
42 #include <xen/sched.h>
43 #include <xen/perfc.h>
44 #include <xen/domain_page.h>
45 #include <asm/page.h>
46 #include <asm/current.h>
47 #include <asm/shadow.h>
48 #include <asm/flushtlb.h>
49 #include <asm/hvm/hvm.h>
50 #include "private.h"
51 #include "types.h"
53 /* The first cut: an absolutely synchronous, trap-and-emulate version,
54 * supporting only HVM guests (and so only "external" shadow mode).
55 *
56 * THINGS TO DO LATER:
57 *
58 * FIX GVA_TO_GPA
59 * The current interface returns an unsigned long, which is not big enough
60 * to hold a physical address in PAE. Should return a gfn instead.
61 *
62 * TEARDOWN HEURISTICS
63 * Also: have a heuristic for when to destroy a previous paging-mode's
64 * shadows. When a guest is done with its start-of-day 32-bit tables
65 * and reuses the memory we want to drop those shadows. Start with
66 * shadows in a page in two modes as a hint, but beware of clever tricks
67 * like reusing a pagetable for both PAE and 64-bit during boot...
68 *
69 * PAE LINEAR MAPS
70 * Rework shadow_get_l*e() to have the option of using map_domain_page()
71 * instead of linear maps. Add appropriate unmap_l*e calls in the users.
72 * Then we can test the speed difference made by linear maps. If the
73 * map_domain_page() version is OK on PAE, we could maybe allow a lightweight
74 * l3-and-l2h-only shadow mode for PAE PV guests that would allow them
75 * to share l2h pages again.
76 *
77 * PAE L3 COPYING
78 * In this code, we copy all 32 bytes of a PAE L3 every time we change an
79 * entry in it, and every time we change CR3. We copy it for the linear
80 * mappings (ugh! PAE linear mappings) and we copy it to the low-memory
81 * buffer so it fits in CR3. Maybe we can avoid some of this recopying
82 * by using the shadow directly in some places.
83 * Also, for SMP, need to actually respond to seeing shadow.pae_flip_pending.
84 *
85 * GUEST_WALK_TABLES TLB FLUSH COALESCE
86 * guest_walk_tables can do up to three remote TLB flushes as it walks to
87 * the first l1 of a new pagetable. Should coalesce the flushes to the end,
88 * and if we do flush, re-do the walk. If anything has changed, then
89 * pause all the other vcpus and do the walk *again*.
90 *
91 * WP DISABLED
92 * Consider how to implement having the WP bit of CR0 set to 0.
93 * Since we need to be able to cause write faults to pagetables, this might
94 * end up looking like not having the (guest) pagetables present at all in
95 * HVM guests...
96 *
97 * PSE disabled / PSE36
98 * We don't support any modes other than PSE enabled, PSE36 disabled.
99 * Neither of those would be hard to change, but we'd need to be able to
100 * deal with shadows made in one mode and used in another.
101 */
103 #define FETCH_TYPE_PREFETCH 1
104 #define FETCH_TYPE_DEMAND 2
105 #define FETCH_TYPE_WRITE 4
106 typedef enum {
107 ft_prefetch = FETCH_TYPE_PREFETCH,
108 ft_demand_read = FETCH_TYPE_DEMAND,
109 ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,
110 } fetch_type_t;
112 #ifdef DEBUG_TRACE_DUMP
113 static char *fetch_type_names[] = {
114 [ft_prefetch] "prefetch",
115 [ft_demand_read] "demand read",
116 [ft_demand_write] "demand write",
117 };
118 #endif
120 /* XXX forward declarations */
121 #if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
122 static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res);
123 #endif
124 static inline void sh_update_linear_entries(struct vcpu *v);
126 /**************************************************************************/
127 /* Hash table mapping from guest pagetables to shadows
128 *
129 * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
130 * FL1's: maps the *gfn* of the start of a superpage to the mfn of a
131 * shadow L1 which maps its "splinters".
132 * PAE CR3s: maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
133 * PAE L3 info page for that CR3 value.
134 */
136 static inline mfn_t
137 get_fl1_shadow_status(struct vcpu *v, gfn_t gfn)
138 /* Look for FL1 shadows in the hash table */
139 {
140 mfn_t smfn = shadow_hash_lookup(v, gfn_x(gfn),
141 PGC_SH_fl1_shadow >> PGC_SH_type_shift);
143 if ( unlikely(shadow_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
144 {
145 struct page_info *page = mfn_to_page(smfn);
146 if ( !(page->count_info & PGC_SH_log_dirty) )
147 shadow_convert_to_log_dirty(v, smfn);
148 }
150 return smfn;
151 }
153 static inline mfn_t
154 get_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
155 /* Look for shadows in the hash table */
156 {
157 mfn_t smfn = shadow_hash_lookup(v, mfn_x(gmfn),
158 shadow_type >> PGC_SH_type_shift);
159 perfc_incrc(shadow_get_shadow_status);
161 if ( unlikely(shadow_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
162 {
163 struct page_info *page = mfn_to_page(smfn);
164 if ( !(page->count_info & PGC_SH_log_dirty) )
165 shadow_convert_to_log_dirty(v, smfn);
166 }
168 return smfn;
169 }
171 static inline void
172 set_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
173 /* Put an FL1 shadow into the hash table */
174 {
175 SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
176 gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
178 if ( unlikely(shadow_mode_log_dirty(v->domain)) )
179 // mark this shadow as a log dirty shadow...
180 set_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
181 else
182 clear_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
184 shadow_hash_insert(v, gfn_x(gfn),
185 PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
186 }
188 static inline void
189 set_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
190 /* Put a shadow into the hash table */
191 {
192 struct domain *d = v->domain;
193 int res;
195 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
196 d->domain_id, v->vcpu_id, mfn_x(gmfn),
197 shadow_type, mfn_x(smfn));
199 if ( unlikely(shadow_mode_log_dirty(d)) )
200 // mark this shadow as a log dirty shadow...
201 set_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
202 else
203 clear_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
205 res = get_page(mfn_to_page(gmfn), d);
206 ASSERT(res == 1);
208 shadow_hash_insert(v, mfn_x(gmfn), shadow_type >> PGC_SH_type_shift,
209 smfn);
210 }
212 static inline void
213 delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
214 /* Remove a shadow from the hash table */
215 {
216 SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
217 gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
219 shadow_hash_delete(v, gfn_x(gfn),
220 PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
221 }
223 static inline void
224 delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
225 /* Remove a shadow from the hash table */
226 {
227 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
228 v->domain->domain_id, v->vcpu_id,
229 mfn_x(gmfn), shadow_type, mfn_x(smfn));
230 shadow_hash_delete(v, mfn_x(gmfn),
231 shadow_type >> PGC_SH_type_shift, smfn);
232 put_page(mfn_to_page(gmfn));
233 }
235 /**************************************************************************/
236 /* CPU feature support querying */
238 static inline int
239 guest_supports_superpages(struct vcpu *v)
240 {
241 /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
242 * CR4.PSE is set or the guest is in PAE or long mode */
243 return (hvm_guest(v) && (GUEST_PAGING_LEVELS != 2
244 || (hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PSE)));
245 }
247 static inline int
248 guest_supports_nx(struct vcpu *v)
249 {
250 if ( !hvm_guest(v) )
251 return cpu_has_nx;
253 // XXX - fix this!
254 return 1;
255 }
258 /**************************************************************************/
259 /* Functions for walking the guest page tables */
262 /* Walk the guest pagetables, filling the walk_t with what we see.
263 * Takes an uninitialised walk_t. The caller must call unmap_walk()
264 * on the walk_t before discarding it or calling guest_walk_tables again.
265 * If "guest_op" is non-zero, we are serving a genuine guest memory access,
266 * and must (a) be under the shadow lock, and (b) remove write access
267 * from any gueat PT pages we see, as we will be using their contents to
268 * perform shadow updates.
269 * Returns 0 for success or non-zero if the guest pagetables are malformed.
270 * N.B. Finding a not-present entry does not cause a non-zero return code. */
271 static inline int
272 guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
273 {
274 ASSERT(!guest_op || shadow_lock_is_acquired(v->domain));
276 perfc_incrc(shadow_guest_walk);
277 memset(gw, 0, sizeof(*gw));
278 gw->va = va;
280 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
281 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
282 /* Get l4e from the top level table */
283 gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
284 gw->l4e = (guest_l4e_t *)v->arch.guest_vtable + guest_l4_table_offset(va);
285 /* Walk down to the l3e */
286 if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
287 gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e));
288 if ( !valid_mfn(gw->l3mfn) ) return 1;
289 /* This mfn is a pagetable: make sure the guest can't write to it. */
290 if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
291 flush_tlb_mask(v->domain->domain_dirty_cpumask);
292 gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
293 + guest_l3_table_offset(va);
294 #else /* PAE only... */
295 /* Get l3e from the top level table */
296 gw->l3mfn = pagetable_get_mfn(v->arch.guest_table);
297 gw->l3e = (guest_l3e_t *)v->arch.guest_vtable + guest_l3_table_offset(va);
298 #endif /* PAE or 64... */
299 /* Walk down to the l2e */
300 if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
301 gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e));
302 if ( !valid_mfn(gw->l2mfn) ) return 1;
303 /* This mfn is a pagetable: make sure the guest can't write to it. */
304 if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
305 flush_tlb_mask(v->domain->domain_dirty_cpumask);
306 gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
307 + guest_l2_table_offset(va);
308 #else /* 32-bit only... */
309 /* Get l2e from the top level table */
310 gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
311 gw->l2e = (guest_l2e_t *)v->arch.guest_vtable + guest_l2_table_offset(va);
312 #endif /* All levels... */
314 if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
315 if ( guest_supports_superpages(v) &&
316 (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) )
317 {
318 /* Special case: this guest VA is in a PSE superpage, so there's
319 * no guest l1e. We make one up so that the propagation code
320 * can generate a shadow l1 table. Start with the gfn of the
321 * first 4k-page of the superpage. */
322 gfn_t start = guest_l2e_get_gfn(*gw->l2e);
323 /* Grant full access in the l1e, since all the guest entry's
324 * access controls are enforced in the shadow l2e. This lets
325 * us reflect l2 changes later without touching the l1s. */
326 int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
327 _PAGE_ACCESSED|_PAGE_DIRTY);
328 /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
329 * of the level 1 */
330 if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) )
331 flags |= _PAGE_PAT;
332 /* Increment the pfn by the right number of 4k pages.
333 * The ~0x1 is to mask out the PAT bit mentioned above. */
334 start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
335 gw->eff_l1e = guest_l1e_from_gfn(start, flags);
336 gw->l1e = NULL;
337 gw->l1mfn = _mfn(INVALID_MFN);
338 }
339 else
340 {
341 /* Not a superpage: carry on and find the l1e. */
342 gw->l1mfn = vcpu_gfn_to_mfn(v, guest_l2e_get_gfn(*gw->l2e));
343 if ( !valid_mfn(gw->l1mfn) ) return 1;
344 /* This mfn is a pagetable: make sure the guest can't write to it. */
345 if ( guest_op
346 && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
347 flush_tlb_mask(v->domain->domain_dirty_cpumask);
348 gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
349 + guest_l1_table_offset(va);
350 gw->eff_l1e = *gw->l1e;
351 }
353 return 0;
354 }
356 /* Given a walk_t, translate the gw->va into the guest's notion of the
357 * corresponding frame number. */
358 static inline gfn_t
359 guest_walk_to_gfn(walk_t *gw)
360 {
361 if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
362 return _gfn(INVALID_GFN);
363 return guest_l1e_get_gfn(gw->eff_l1e);
364 }
366 /* Given a walk_t, translate the gw->va into the guest's notion of the
367 * corresponding physical address. */
368 static inline paddr_t
369 guest_walk_to_gpa(walk_t *gw)
370 {
371 if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
372 return 0;
373 return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
374 }
377 /* Unmap (and reinitialise) a guest walk.
378 * Call this to dispose of any walk filled in by guest_walk_tables() */
379 static void unmap_walk(struct vcpu *v, walk_t *gw)
380 {
381 #if GUEST_PAGING_LEVELS >= 3
382 #if GUEST_PAGING_LEVELS >= 4
383 if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e);
384 #endif
385 if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e);
386 #endif
387 if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e);
388 #ifdef DEBUG
389 memset(gw, 0, sizeof(*gw));
390 #endif
391 }
394 /* Pretty-print the contents of a guest-walk */
395 static inline void print_gw(walk_t *gw)
396 {
397 SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va);
398 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
399 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
400 SHADOW_PRINTK(" l4mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l4mfn));
401 SHADOW_PRINTK(" l4e=%p\n", gw->l4e);
402 if ( gw->l4e )
403 SHADOW_PRINTK(" *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4);
404 #endif /* PAE or 64... */
405 SHADOW_PRINTK(" l3mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l3mfn));
406 SHADOW_PRINTK(" l3e=%p\n", gw->l3e);
407 if ( gw->l3e )
408 SHADOW_PRINTK(" *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3);
409 #endif /* All levels... */
410 SHADOW_PRINTK(" l2mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l2mfn));
411 SHADOW_PRINTK(" l2e=%p\n", gw->l2e);
412 if ( gw->l2e )
413 SHADOW_PRINTK(" *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2);
414 SHADOW_PRINTK(" l1mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l1mfn));
415 SHADOW_PRINTK(" l1e=%p\n", gw->l1e);
416 if ( gw->l1e )
417 SHADOW_PRINTK(" *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1);
418 SHADOW_PRINTK(" eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1);
419 }
422 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
423 /* Lightweight audit: pass all the shadows associated with this guest walk
424 * through the audit mechanisms */
425 static void sh_audit_gw(struct vcpu *v, walk_t *gw)
426 {
427 mfn_t smfn;
429 if ( !(SHADOW_AUDIT_ENABLE) )
430 return;
432 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
433 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
434 if ( valid_mfn(gw->l4mfn)
435 && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn,
436 PGC_SH_l4_shadow))) )
437 (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
438 #endif /* PAE or 64... */
439 if ( valid_mfn(gw->l3mfn)
440 && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn,
441 PGC_SH_l3_shadow))) )
442 (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
443 #endif /* All levels... */
444 if ( valid_mfn(gw->l2mfn) )
445 {
446 if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn,
447 PGC_SH_l2_shadow))) )
448 (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
449 #if GUEST_PAGING_LEVELS == 3
450 if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn,
451 PGC_SH_l2h_shadow))) )
452 (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
453 #endif
454 }
455 if ( valid_mfn(gw->l1mfn)
456 && valid_mfn((smfn = get_shadow_status(v, gw->l1mfn,
457 PGC_SH_l1_shadow))) )
458 (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
459 else if ( gw->l2e
460 && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
461 && valid_mfn(
462 (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
463 (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
464 }
466 #else
467 #define sh_audit_gw(_v, _gw) do {} while(0)
468 #endif /* audit code */
472 /**************************************************************************/
473 /* Function to write to the guest tables, for propagating accessed and
474 * dirty bits from the shadow to the guest.
475 * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
476 * and an operation type. The guest entry is always passed as an l1e:
477 * since we only ever write flags, that's OK.
478 * Returns the new flag bits of the guest entry. */
480 static u32 guest_set_ad_bits(struct vcpu *v,
481 mfn_t gmfn,
482 guest_l1e_t *ep,
483 unsigned int level,
484 fetch_type_t ft)
485 {
486 u32 flags;
487 int res = 0;
489 ASSERT(valid_mfn(gmfn)
490 && (sh_mfn_is_a_page_table(gmfn)
491 || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask)
492 == 0)));
493 ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
494 ASSERT(level <= GUEST_PAGING_LEVELS);
495 ASSERT(ft == ft_demand_read || ft == ft_demand_write);
496 ASSERT(shadow_lock_is_acquired(v->domain));
498 flags = guest_l1e_get_flags(*ep);
500 /* PAE l3s do not have A and D bits */
501 if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
502 return flags;
504 /* Need the D bit as well for writes, in L1es and PSE L2es. */
505 if ( ft == ft_demand_write
506 && (level == 1 ||
507 (level == 2 && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
508 {
509 if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED))
510 == (_PAGE_DIRTY | _PAGE_ACCESSED) )
511 return flags; /* Guest already has A and D bits set */
512 flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
513 perfc_incrc(shadow_ad_update);
514 }
515 else
516 {
517 if ( flags & _PAGE_ACCESSED )
518 return flags; /* Guest already has A bit set */
519 flags |= _PAGE_ACCESSED;
520 perfc_incrc(shadow_a_update);
521 }
523 /* Set the bit(s) */
524 sh_mark_dirty(v->domain, gmfn);
525 SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
526 "old flags = %#x, new flags = %#x\n",
527 gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), flags);
528 *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
530 /* Propagate this change to any existing shadows */
531 res = __shadow_validate_guest_entry(v, gmfn, ep, sizeof(*ep));
533 /* We should never need to flush the TLB or recopy PAE entries */
534 ASSERT((res == 0) || (res == SHADOW_SET_CHANGED));
536 return flags;
537 }
539 #if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS)
540 void *
541 sh_guest_map_l1e(struct vcpu *v, unsigned long addr,
542 unsigned long *gl1mfn)
543 {
544 void *pl1e = NULL;
545 walk_t gw;
547 ASSERT(shadow_mode_translate(v->domain));
549 // XXX -- this is expensive, but it's easy to cobble together...
550 // FIXME!
552 shadow_lock(v->domain);
553 guest_walk_tables(v, addr, &gw, 1);
555 if ( gw.l2e &&
556 (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) &&
557 !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) & _PAGE_PSE)) )
558 {
559 if ( gl1mfn )
560 *gl1mfn = mfn_x(gw.l1mfn);
561 pl1e = map_domain_page(mfn_x(gw.l1mfn)) +
562 (guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
563 }
565 unmap_walk(v, &gw);
566 shadow_unlock(v->domain);
568 return pl1e;
569 }
571 void
572 sh_guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
573 {
574 walk_t gw;
576 ASSERT(shadow_mode_translate(v->domain));
578 // XXX -- this is expensive, but it's easy to cobble together...
579 // FIXME!
581 shadow_lock(v->domain);
582 guest_walk_tables(v, addr, &gw, 1);
583 *(guest_l1e_t *)eff_l1e = gw.eff_l1e;
584 unmap_walk(v, &gw);
585 shadow_unlock(v->domain);
586 }
587 #endif /* CONFIG==SHADOW==GUEST */
589 /**************************************************************************/
590 /* Functions to compute the correct index into a shadow page, given an
591 * index into the guest page (as returned by guest_get_index()).
592 * This is trivial when the shadow and guest use the same sized PTEs, but
593 * gets more interesting when those sizes are mismatched (e.g. 32-bit guest,
594 * PAE- or 64-bit shadows).
595 *
596 * These functions also increment the shadow mfn, when necessary. When PTE
597 * sizes are mismatched, it takes 2 shadow L1 pages for a single guest L1
598 * page. In this case, we allocate 2 contiguous pages for the shadow L1, and
599 * use simple pointer arithmetic on a pointer to the guest L1e to figure out
600 * which shadow page we really want. Similarly, when PTE sizes are
601 * mismatched, we shadow a guest L2 page with 4 shadow L2 pages. (The easiest
602 * way to see this is: a 32-bit guest L2 page maps 4GB of virtual address
603 * space, while a PAE- or 64-bit shadow L2 page maps 1GB of virtual address
604 * space.)
605 *
606 * For PAE guests, for every 32-bytes of guest L3 page table, we use 64-bytes
607 * of shadow (to store both the shadow, and the info that would normally be
608 * stored in page_info fields). This arrangement allows the shadow and the
609 * "page_info" fields to always be stored in the same page (in fact, in
610 * the same cache line), avoiding an extra call to map_domain_page().
611 */
613 static inline u32
614 guest_index(void *ptr)
615 {
616 return (u32)((unsigned long)ptr & ~PAGE_MASK) / sizeof(guest_l1e_t);
617 }
619 static inline u32
620 shadow_l1_index(mfn_t *smfn, u32 guest_index)
621 {
622 #if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
623 *smfn = _mfn(mfn_x(*smfn) +
624 (guest_index / SHADOW_L1_PAGETABLE_ENTRIES));
625 return (guest_index % SHADOW_L1_PAGETABLE_ENTRIES);
626 #else
627 return guest_index;
628 #endif
629 }
631 static inline u32
632 shadow_l2_index(mfn_t *smfn, u32 guest_index)
633 {
634 #if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
635 // Because we use 2 shadow l2 entries for each guest entry, the number of
636 // guest entries per shadow page is SHADOW_L2_PAGETABLE_ENTRIES/2
637 //
638 *smfn = _mfn(mfn_x(*smfn) +
639 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
641 // We multiple by two to get the index of the first of the two entries
642 // used to shadow the specified guest entry.
643 return (guest_index % (SHADOW_L2_PAGETABLE_ENTRIES / 2)) * 2;
644 #else
645 return guest_index;
646 #endif
647 }
649 #if GUEST_PAGING_LEVELS >= 3
651 static inline u32
652 shadow_l3_index(mfn_t *smfn, u32 guest_index)
653 {
654 #if GUEST_PAGING_LEVELS == 3
655 u32 group_id;
657 // Because we use twice the space in L3 shadows as was consumed in guest
658 // L3s, the number of guest entries per shadow page is
659 // SHADOW_L2_PAGETABLE_ENTRIES/2. (Note this is *not*
660 // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
661 //
662 *smfn = _mfn(mfn_x(*smfn) +
663 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
665 // We store PAE L3 shadows in groups of 4, alternating shadows and
666 // pae_l3_bookkeeping structs. So the effective shadow index is
667 // the the group_id * 8 + the offset within the group.
668 //
669 guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
670 group_id = guest_index / 4;
671 return (group_id * 8) + (guest_index % 4);
672 #else
673 return guest_index;
674 #endif
675 }
677 #endif // GUEST_PAGING_LEVELS >= 3
679 #if GUEST_PAGING_LEVELS >= 4
681 static inline u32
682 shadow_l4_index(mfn_t *smfn, u32 guest_index)
683 {
684 return guest_index;
685 }
687 #endif // GUEST_PAGING_LEVELS >= 4
690 /**************************************************************************/
691 /* Functions which compute shadow entries from their corresponding guest
692 * entries.
693 *
694 * These are the "heart" of the shadow code.
695 *
696 * There are two sets of these: those that are called on demand faults (read
697 * faults and write faults), and those that are essentially called to
698 * "prefetch" (or propagate) entries from the guest into the shadow. The read
699 * fault and write fault are handled as two separate cases for L1 entries (due
700 * to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together
701 * into the respective demand_fault functions.
702 */
703 // The function below tries to capture all of the flag manipulation for the
704 // demand and propagate functions into one place.
705 //
706 static always_inline u32
707 sh_propagate_flags(struct vcpu *v, mfn_t target_mfn,
708 u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn,
709 int mmio, int level, fetch_type_t ft)
710 {
711 #define CHECK(_cond) \
712 do { \
713 if (unlikely(!(_cond))) \
714 { \
715 printk("%s %s %d ASSERTION (%s) FAILED\n", \
716 __func__, __FILE__, __LINE__, #_cond); \
717 domain_crash(d); \
718 } \
719 } while (0);
721 struct domain *d = v->domain;
722 u32 pass_thru_flags;
723 u32 sflags;
725 // XXX -- might want to think about PAT support for HVM guests...
727 #ifndef NDEBUG
728 // MMIO can only occur from L1e's
729 //
730 if ( mmio )
731 CHECK(level == 1);
733 // We should always have a pointer to the guest entry if it's a non-PSE
734 // non-MMIO demand access.
735 if ( ft & FETCH_TYPE_DEMAND )
736 CHECK(guest_entry_ptr || level == 1);
737 #endif
739 // A not-present guest entry has a special signature in the shadow table,
740 // so that we do not have to consult the guest tables multiple times...
741 //
742 if ( unlikely(!(gflags & _PAGE_PRESENT)) )
743 return _PAGE_SHADOW_GUEST_NOT_PRESENT;
745 // Must have a valid target_mfn, unless this is mmio, or unless this is a
746 // prefetch. In the case of a prefetch, an invalid mfn means that we can
747 // not usefully shadow anything, and so we return early.
748 //
749 if ( !valid_mfn(target_mfn) )
750 {
751 CHECK((ft == ft_prefetch) || mmio);
752 if ( !mmio )
753 return 0;
754 }
756 // Set the A and D bits in the guest entry, if we need to.
757 if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
758 gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
760 // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
761 //
762 if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
763 pass_thru_flags = _PAGE_PRESENT;
764 else
765 {
766 pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
767 _PAGE_RW | _PAGE_PRESENT);
768 if ( guest_supports_nx(v) )
769 pass_thru_flags |= _PAGE_NX_BIT;
770 }
772 // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
773 // L3e's; they are all implied. So we emulate them here.
774 //
775 if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
776 gflags = pass_thru_flags;
778 // Propagate bits from the guest to the shadow.
779 // Some of these may be overwritten, below.
780 // Since we know the guest's PRESENT bit is set, we also set the shadow's
781 // SHADOW_PRESENT bit.
782 //
783 sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
785 // Copy the guest's RW bit into the SHADOW_RW bit.
786 //
787 if ( gflags & _PAGE_RW )
788 sflags |= _PAGE_SHADOW_RW;
790 // Set the A&D bits for higher level shadows.
791 // Higher level entries do not, strictly speaking, have dirty bits, but
792 // since we use shadow linear tables, each of these entries may, at some
793 // point in time, also serve as a shadow L1 entry.
794 // By setting both the A&D bits in each of these, we eliminate the burden
795 // on the hardware to update these bits on initial accesses.
796 //
797 if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
798 sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
800 // If the A or D bit has not yet been set in the guest, then we must
801 // prevent the corresponding kind of access.
802 //
803 if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
804 !(gflags & _PAGE_ACCESSED)) )
805 sflags &= ~_PAGE_PRESENT;
807 /* D bits exist in L1es and PSE L2es */
808 if ( unlikely(((level == 1) ||
809 ((level == 2) &&
810 (gflags & _PAGE_PSE) &&
811 guest_supports_superpages(v)))
812 && !(gflags & _PAGE_DIRTY)) )
813 sflags &= ~_PAGE_RW;
815 // MMIO caching
816 //
817 // MMIO mappings are marked as not present, but we set the SHADOW_MMIO bit
818 // to cache the fact that this entry is in MMIO space.
819 //
820 if ( (level == 1) && mmio )
821 {
822 sflags &= ~(_PAGE_PRESENT);
823 sflags |= _PAGE_SHADOW_MMIO;
824 }
825 else
826 {
827 // shadow_mode_log_dirty support
828 //
829 // Only allow the guest write access to a page a) on a demand fault,
830 // or b) if the page is already marked as dirty.
831 //
832 if ( unlikely((level == 1) &&
833 !(ft & FETCH_TYPE_WRITE) &&
834 shadow_mode_log_dirty(d) &&
835 !sh_mfn_is_dirty(d, target_mfn)) )
836 {
837 sflags &= ~_PAGE_RW;
838 }
840 // protect guest page tables
841 //
842 if ( unlikely((level == 1) &&
843 sh_mfn_is_a_page_table(target_mfn)) )
844 {
845 if ( shadow_mode_trap_reads(d) )
846 {
847 // if we are trapping both reads & writes, then mark this page
848 // as not present...
849 //
850 sflags &= ~_PAGE_PRESENT;
851 }
852 else
853 {
854 // otherwise, just prevent any writes...
855 //
856 sflags &= ~_PAGE_RW;
857 }
858 }
859 }
861 // PV guests in 64-bit mode use two different page tables for user vs
862 // supervisor permissions, making the guest's _PAGE_USER bit irrelevant.
863 // It is always shadowed as present...
864 if ( (GUEST_PAGING_LEVELS == 4) && !hvm_guest(v) )
865 {
866 sflags |= _PAGE_USER;
867 }
869 return sflags;
870 #undef CHECK
871 }
873 #if GUEST_PAGING_LEVELS >= 4
874 static void
875 l4e_propagate_from_guest(struct vcpu *v,
876 guest_l4e_t *gl4e,
877 mfn_t gl4mfn,
878 mfn_t sl3mfn,
879 shadow_l4e_t *sl4p,
880 fetch_type_t ft)
881 {
882 u32 gflags = guest_l4e_get_flags(*gl4e);
883 u32 sflags = sh_propagate_flags(v, sl3mfn, gflags, (guest_l1e_t *) gl4e,
884 gl4mfn, 0, 4, ft);
886 *sl4p = shadow_l4e_from_mfn(sl3mfn, sflags);
888 SHADOW_DEBUG(PROPAGATE,
889 "%s gl4e=%" SH_PRI_gpte " sl4e=%" SH_PRI_pte "\n",
890 fetch_type_names[ft], gl4e->l4, sl4p->l4);
891 ASSERT(sflags != -1);
892 }
893 #endif // GUEST_PAGING_LEVELS >= 4
895 #if GUEST_PAGING_LEVELS >= 3
896 static void
897 l3e_propagate_from_guest(struct vcpu *v,
898 guest_l3e_t *gl3e,
899 mfn_t gl3mfn,
900 mfn_t sl2mfn,
901 shadow_l3e_t *sl3p,
902 fetch_type_t ft)
903 {
904 u32 gflags = guest_l3e_get_flags(*gl3e);
905 u32 sflags = sh_propagate_flags(v, sl2mfn, gflags, (guest_l1e_t *) gl3e,
906 gl3mfn, 0, 3, ft);
908 *sl3p = shadow_l3e_from_mfn(sl2mfn, sflags);
910 SHADOW_DEBUG(PROPAGATE,
911 "%s gl3e=%" SH_PRI_gpte " sl3e=%" SH_PRI_pte "\n",
912 fetch_type_names[ft], gl3e->l3, sl3p->l3);
913 ASSERT(sflags != -1);
914 }
915 #endif // GUEST_PAGING_LEVELS >= 3
917 static void
918 l2e_propagate_from_guest(struct vcpu *v,
919 guest_l2e_t *gl2e,
920 mfn_t gl2mfn,
921 mfn_t sl1mfn,
922 shadow_l2e_t *sl2p,
923 fetch_type_t ft)
924 {
925 u32 gflags = guest_l2e_get_flags(*gl2e);
926 u32 sflags = sh_propagate_flags(v, sl1mfn, gflags, (guest_l1e_t *) gl2e,
927 gl2mfn, 0, 2, ft);
929 *sl2p = shadow_l2e_from_mfn(sl1mfn, sflags);
931 SHADOW_DEBUG(PROPAGATE,
932 "%s gl2e=%" SH_PRI_gpte " sl2e=%" SH_PRI_pte "\n",
933 fetch_type_names[ft], gl2e->l2, sl2p->l2);
934 ASSERT(sflags != -1);
935 }
937 static inline int
938 l1e_read_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
939 int mmio)
940 /* returns 1 if emulation is required, and 0 otherwise */
941 {
942 struct domain *d = v->domain;
943 u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
944 u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
945 mmio, 1, ft_demand_read);
947 if ( shadow_mode_trap_reads(d) && !mmio && sh_mfn_is_a_page_table(gmfn) )
948 {
949 // emulation required!
950 *sl1p = shadow_l1e_empty();
951 return 1;
952 }
954 *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
956 SHADOW_DEBUG(PROPAGATE,
957 "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
958 (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
960 ASSERT(sflags != -1);
961 return 0;
962 }
964 static inline int
965 l1e_write_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
966 int mmio)
967 /* returns 1 if emulation is required, and 0 otherwise */
968 {
969 struct domain *d = v->domain;
970 u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
971 u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
972 mmio, 1, ft_demand_write);
974 sh_mark_dirty(d, gmfn);
976 if ( !mmio && sh_mfn_is_a_page_table(gmfn) )
977 {
978 // emulation required!
979 *sl1p = shadow_l1e_empty();
980 return 1;
981 }
983 *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
985 SHADOW_DEBUG(PROPAGATE,
986 "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
987 (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
989 ASSERT(sflags != -1);
990 return 0;
991 }
993 static inline void
994 l1e_propagate_from_guest(struct vcpu *v, guest_l1e_t gl1e, shadow_l1e_t *sl1p,
995 int mmio)
996 {
997 gfn_t gfn = guest_l1e_get_gfn(gl1e);
998 mfn_t gmfn = (mmio) ? _mfn(gfn_x(gfn)) : vcpu_gfn_to_mfn(v, gfn);
999 u32 gflags = guest_l1e_get_flags(gl1e);
1000 u32 sflags = sh_propagate_flags(v, gmfn, gflags, 0, _mfn(INVALID_MFN),
1001 mmio, 1, ft_prefetch);
1003 *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
1005 SHADOW_DEBUG(PROPAGATE,
1006 "gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
1007 gl1e.l1, sl1p->l1);
1009 ASSERT(sflags != -1);
1013 /**************************************************************************/
1014 /* These functions update shadow entries (and do bookkeeping on the shadow
1015 * tables they are in). It is intended that they are the only
1016 * functions which ever write (non-zero) data onto a shadow page.
1018 * They return a set of flags:
1019 * SHADOW_SET_CHANGED -- we actually wrote a new value to the shadow.
1020 * SHADOW_SET_FLUSH -- the caller must cause a TLB flush.
1021 * SHADOW_SET_ERROR -- the input is not a valid entry (for example, if
1022 * shadow_get_page_from_l1e() fails).
1023 * SHADOW_SET_L3PAE_RECOPY -- one or more vcpu's need to have their local
1024 * copies of their PAE L3 entries re-copied.
1025 */
1027 static inline void safe_write_entry(void *dst, void *src)
1028 /* Copy one PTE safely when processors might be running on the
1029 * destination pagetable. This does *not* give safety against
1030 * concurrent writes (that's what the shadow lock is for), just
1031 * stops the hardware picking up partially written entries. */
1033 volatile unsigned long *d = dst;
1034 unsigned long *s = src;
1035 ASSERT(!((unsigned long) d & (sizeof (shadow_l1e_t) - 1)));
1036 #if CONFIG_PAGING_LEVELS == 3
1037 /* In PAE mode, pagetable entries are larger
1038 * than machine words, so won't get written atomically. We need to make
1039 * sure any other cpu running on these shadows doesn't see a
1040 * half-written entry. Do this by marking the entry not-present first,
1041 * then writing the high word before the low word. */
1042 BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long));
1043 d[0] = 0;
1044 d[1] = s[1];
1045 d[0] = s[0];
1046 #else
1047 /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word,
1048 * which will be an atomic write, since the entry is aligned. */
1049 BUILD_BUG_ON(sizeof (shadow_l1e_t) != sizeof (unsigned long));
1050 *d = *s;
1051 #endif
1055 static inline void
1056 shadow_write_entries(void *d, void *s, int entries, mfn_t mfn)
1057 /* This function does the actual writes to shadow pages.
1058 * It must not be called directly, since it doesn't do the bookkeeping
1059 * that shadow_set_l*e() functions do. */
1061 shadow_l1e_t *dst = d;
1062 shadow_l1e_t *src = s;
1063 void *map = NULL;
1064 int i;
1066 /* Because we mirror access rights at all levels in the shadow, an
1067 * l2 (or higher) entry with the RW bit cleared will leave us with
1068 * no write access through the linear map.
1069 * We detect that by writing to the shadow with copy_to_user() and
1070 * using map_domain_page() to get a writeable mapping if we need to. */
1071 if ( __copy_to_user(d, d, sizeof (unsigned long)) != 0 )
1073 perfc_incrc(shadow_linear_map_failed);
1074 map = sh_map_domain_page(mfn);
1075 ASSERT(map != NULL);
1076 dst = map + ((unsigned long)dst & (PAGE_SIZE - 1));
1080 for ( i = 0; i < entries; i++ )
1081 safe_write_entry(dst++, src++);
1083 if ( map != NULL ) sh_unmap_domain_page(map);
1085 /* XXX TODO:
1086 * Update min/max field in page_info struct of this mfn */
1089 static inline int
1090 perms_strictly_increased(u32 old_flags, u32 new_flags)
1091 /* Given the flags of two entries, are the new flags a strict
1092 * increase in rights over the old ones? */
1094 u32 of = old_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
1095 u32 nf = new_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
1096 /* Flip the NX bit, since it's the only one that decreases rights;
1097 * we calculate as if it were an "X" bit. */
1098 of ^= _PAGE_NX_BIT;
1099 nf ^= _PAGE_NX_BIT;
1100 /* If the changed bits are all set in the new flags, then rights strictly
1101 * increased between old and new. */
1102 return ((of | (of ^ nf)) == nf);
1105 static int inline
1106 shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
1108 int res;
1109 mfn_t mfn;
1110 struct domain *owner;
1111 shadow_l1e_t sanitized_sl1e =
1112 shadow_l1e_remove_flags(sl1e, _PAGE_SHADOW_RW | _PAGE_SHADOW_PRESENT);
1114 //ASSERT(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT);
1115 //ASSERT((shadow_l1e_get_flags(sl1e) & L1_DISALLOW_MASK) == 0);
1117 if ( !shadow_mode_refcounts(d) )
1118 return 1;
1120 res = get_page_from_l1e(sanitized_sl1e, d);
1122 // If a privileged domain is attempting to install a map of a page it does
1123 // not own, we let it succeed anyway.
1124 //
1125 if ( unlikely(!res) &&
1126 IS_PRIV(d) &&
1127 !shadow_mode_translate(d) &&
1128 valid_mfn(mfn = shadow_l1e_get_mfn(sl1e)) &&
1129 (owner = page_get_owner(mfn_to_page(mfn))) &&
1130 (d != owner) )
1132 res = get_page_from_l1e(sanitized_sl1e, owner);
1133 SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
1134 "which is owned by domain %d: %s\n",
1135 d->domain_id, mfn_x(mfn), owner->domain_id,
1136 res ? "success" : "failed");
1139 if ( unlikely(!res) )
1141 perfc_incrc(shadow_get_page_fail);
1142 SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
1145 return res;
1148 static void inline
1149 shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
1151 if ( !shadow_mode_refcounts(d) )
1152 return;
1154 put_page_from_l1e(sl1e, d);
1157 #if GUEST_PAGING_LEVELS >= 4
1158 static int shadow_set_l4e(struct vcpu *v,
1159 shadow_l4e_t *sl4e,
1160 shadow_l4e_t new_sl4e,
1161 mfn_t sl4mfn)
1163 int flags = 0;
1164 shadow_l4e_t old_sl4e;
1165 paddr_t paddr;
1166 ASSERT(sl4e != NULL);
1167 old_sl4e = *sl4e;
1169 if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
1171 paddr = ((((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT)
1172 | (((unsigned long)sl4e) & ~PAGE_MASK));
1174 if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
1176 /* About to install a new reference */
1177 sh_get_ref(shadow_l4e_get_mfn(new_sl4e), paddr);
1180 /* Write the new entry */
1181 shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
1182 flags |= SHADOW_SET_CHANGED;
1184 if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT )
1186 /* We lost a reference to an old mfn. */
1187 mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
1188 if ( (mfn_x(osl3mfn) != mfn_x(shadow_l4e_get_mfn(new_sl4e)))
1189 || !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e),
1190 shadow_l4e_get_flags(new_sl4e)) )
1192 flags |= SHADOW_SET_FLUSH;
1194 sh_put_ref(v, osl3mfn, paddr);
1196 return flags;
1198 #endif /* GUEST_PAGING_LEVELS >= 4 */
1200 #if GUEST_PAGING_LEVELS >= 3
1201 static int shadow_set_l3e(struct vcpu *v,
1202 shadow_l3e_t *sl3e,
1203 shadow_l3e_t new_sl3e,
1204 mfn_t sl3mfn)
1206 int flags = 0;
1207 shadow_l3e_t old_sl3e;
1208 paddr_t paddr;
1209 ASSERT(sl3e != NULL);
1210 old_sl3e = *sl3e;
1212 if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
1214 paddr = ((((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT)
1215 | (((unsigned long)sl3e) & ~PAGE_MASK));
1217 if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT )
1219 /* About to install a new reference */
1220 sh_get_ref(shadow_l3e_get_mfn(new_sl3e), paddr);
1223 /* Write the new entry */
1224 shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
1225 flags |= SHADOW_SET_CHANGED;
1227 #if GUEST_PAGING_LEVELS == 3
1228 /* We wrote a guest l3e in a PAE pagetable. This table is copied in
1229 * the linear pagetable entries of its l2s, and may also be copied
1230 * to a low memory location to make it fit in CR3. Report that we
1231 * need to resync those copies (we can't wait for the guest to flush
1232 * the TLB because it might be an increase in rights). */
1234 struct vcpu *vcpu;
1236 struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
1237 for_each_vcpu(v->domain, vcpu)
1239 if (info->vcpus & (1 << vcpu->vcpu_id))
1241 // Remember that this flip/update needs to occur.
1242 vcpu->arch.shadow.pae_flip_pending = 1;
1243 flags |= SHADOW_SET_L3PAE_RECOPY;
1247 #endif
1249 if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT )
1251 /* We lost a reference to an old mfn. */
1252 mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
1253 if ( (mfn_x(osl2mfn) != mfn_x(shadow_l3e_get_mfn(new_sl3e))) ||
1254 !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e),
1255 shadow_l3e_get_flags(new_sl3e)) )
1257 flags |= SHADOW_SET_FLUSH;
1259 sh_put_ref(v, osl2mfn, paddr);
1261 return flags;
1263 #endif /* GUEST_PAGING_LEVELS >= 3 */
1265 static int shadow_set_l2e(struct vcpu *v,
1266 shadow_l2e_t *sl2e,
1267 shadow_l2e_t new_sl2e,
1268 mfn_t sl2mfn)
1270 int flags = 0;
1271 shadow_l2e_t old_sl2e;
1272 paddr_t paddr;
1274 #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
1275 /* In 2-on-3 we work with pairs of l2es pointing at two-page
1276 * shadows. Reference counting and up-pointers track from the first
1277 * page of the shadow to the first l2e, so make sure that we're
1278 * working with those:
1279 * Align the pointer down so it's pointing at the first of the pair */
1280 sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~(sizeof(shadow_l2e_t)));
1281 /* Align the mfn of the shadow entry too */
1282 new_sl2e.l2 &= ~(1<<PAGE_SHIFT);
1283 #endif
1285 ASSERT(sl2e != NULL);
1286 old_sl2e = *sl2e;
1288 if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
1290 paddr = ((((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
1291 | (((unsigned long)sl2e) & ~PAGE_MASK));
1293 if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
1295 /* About to install a new reference */
1296 sh_get_ref(shadow_l2e_get_mfn(new_sl2e), paddr);
1299 /* Write the new entry */
1300 #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
1302 shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
1303 /* The l1 shadow is two pages long and need to be pointed to by
1304 * two adjacent l1es. The pair have the same flags, but point
1305 * at odd and even MFNs */
1306 ASSERT(!(pair[0].l2 & (1<<PAGE_SHIFT)));
1307 pair[1].l2 |= (1<<PAGE_SHIFT);
1308 shadow_write_entries(sl2e, &pair, 2, sl2mfn);
1310 #else /* normal case */
1311 shadow_write_entries(sl2e, &new_sl2e, 1, sl2mfn);
1312 #endif
1313 flags |= SHADOW_SET_CHANGED;
1315 if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
1317 /* We lost a reference to an old mfn. */
1318 mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
1319 if ( (mfn_x(osl1mfn) != mfn_x(shadow_l2e_get_mfn(new_sl2e))) ||
1320 !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e),
1321 shadow_l2e_get_flags(new_sl2e)) )
1323 flags |= SHADOW_SET_FLUSH;
1325 sh_put_ref(v, osl1mfn, paddr);
1327 return flags;
1330 static int shadow_set_l1e(struct vcpu *v,
1331 shadow_l1e_t *sl1e,
1332 shadow_l1e_t new_sl1e,
1333 mfn_t sl1mfn)
1335 int flags = 0;
1336 struct domain *d = v->domain;
1337 shadow_l1e_t old_sl1e;
1338 ASSERT(sl1e != NULL);
1340 old_sl1e = *sl1e;
1342 if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
1344 if ( shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT )
1346 /* About to install a new reference */
1347 if ( shadow_mode_refcounts(d) ) {
1348 if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 )
1350 /* Doesn't look like a pagetable. */
1351 flags |= SHADOW_SET_ERROR;
1352 new_sl1e = shadow_l1e_empty();
1357 /* Write the new entry */
1358 shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
1359 flags |= SHADOW_SET_CHANGED;
1361 if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT )
1363 /* We lost a reference to an old mfn. */
1364 /* N.B. Unlike higher-level sets, never need an extra flush
1365 * when writing an l1e. Because it points to the same guest frame
1366 * as the guest l1e did, it's the guest's responsibility to
1367 * trigger a flush later. */
1368 if ( shadow_mode_refcounts(d) )
1370 shadow_put_page_from_l1e(old_sl1e, d);
1373 return flags;
1377 /**************************************************************************/
1378 /* These functions take a vcpu and a virtual address, and return a pointer
1379 * to the appropriate level N entry from the shadow tables.
1380 * If the necessary tables are not present in the shadow, they return NULL. */
1382 /* N.B. The use of GUEST_PAGING_LEVELS here is correct. If the shadow has
1383 * more levels than the guest, the upper levels are always fixed and do not
1384 * reflect any information from the guest, so we do not use these functions
1385 * to access them. */
1387 #if GUEST_PAGING_LEVELS >= 4
1388 static shadow_l4e_t *
1389 shadow_get_l4e(struct vcpu *v, unsigned long va)
1391 /* Reading the top level table is always valid. */
1392 return sh_linear_l4_table(v) + shadow_l4_linear_offset(va);
1394 #endif /* GUEST_PAGING_LEVELS >= 4 */
1397 #if GUEST_PAGING_LEVELS >= 3
1398 static shadow_l3e_t *
1399 shadow_get_l3e(struct vcpu *v, unsigned long va)
1401 #if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
1402 /* Get the l4 */
1403 shadow_l4e_t *sl4e = shadow_get_l4e(v, va);
1404 ASSERT(sl4e != NULL);
1405 if ( !(shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT) )
1406 return NULL;
1407 ASSERT(valid_mfn(shadow_l4e_get_mfn(*sl4e)));
1408 /* l4 was present; OK to get the l3 */
1409 return sh_linear_l3_table(v) + shadow_l3_linear_offset(va);
1410 #else /* PAE... */
1411 /* Top level is always mapped */
1412 ASSERT(v->arch.shadow_vtable);
1413 return ((shadow_l3e_t *)v->arch.shadow_vtable) + shadow_l3_linear_offset(va);
1414 #endif
1416 #endif /* GUEST_PAGING_LEVELS >= 3 */
1419 static shadow_l2e_t *
1420 shadow_get_l2e(struct vcpu *v, unsigned long va)
1422 #if GUEST_PAGING_LEVELS >= 3 /* 64bit/PAE... */
1423 /* Get the l3 */
1424 shadow_l3e_t *sl3e = shadow_get_l3e(v, va);
1425 if ( sl3e == NULL || !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
1426 return NULL;
1427 ASSERT(valid_mfn(shadow_l3e_get_mfn(*sl3e)));
1428 /* l3 was present; OK to get the l2 */
1429 #endif
1430 return sh_linear_l2_table(v) + shadow_l2_linear_offset(va);
1434 #if 0 // avoid the compiler warning for now...
1436 static shadow_l1e_t *
1437 shadow_get_l1e(struct vcpu *v, unsigned long va)
1439 /* Get the l2 */
1440 shadow_l2e_t *sl2e = shadow_get_l2e(v, va);
1441 if ( sl2e == NULL || !(shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT) )
1442 return NULL;
1443 ASSERT(valid_mfn(shadow_l2e_get_mfn(*sl2e)));
1444 /* l2 was present; OK to get the l1 */
1445 return sh_linear_l1_table(v) + shadow_l1_linear_offset(va);
1448 #endif
1451 /**************************************************************************/
1452 /* Macros to walk pagetables. These take the shadow of a pagetable and
1453 * walk every "interesting" entry. That is, they don't touch Xen mappings,
1454 * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every
1455 * second entry (since pairs of entries are managed together). For multi-page
1456 * shadows they walk all pages.
1458 * Arguments are an MFN, the variable to point to each entry, a variable
1459 * to indicate that we are done (we will shortcut to the end of the scan
1460 * when _done != 0), a variable to indicate that we should avoid Xen mappings,
1461 * and the code.
1463 * WARNING: These macros have side-effects. They change the values of both
1464 * the pointer and the MFN. */
1466 static inline void increment_ptr_to_guest_entry(void *ptr)
1468 if ( ptr )
1470 guest_l1e_t **entry = ptr;
1471 (*entry)++;
1475 /* All kinds of l1: touch all entries */
1476 #define _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \
1477 do { \
1478 int _i; \
1479 shadow_l1e_t *_sp = map_shadow_page((_sl1mfn)); \
1480 ASSERT((mfn_to_page(_sl1mfn)->count_info & PGC_SH_type_mask) \
1481 == PGC_SH_l1_shadow \
1482 || (mfn_to_page(_sl1mfn)->count_info & PGC_SH_type_mask) \
1483 == PGC_SH_fl1_shadow); \
1484 for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ ) \
1485 { \
1486 (_sl1e) = _sp + _i; \
1487 if ( shadow_l1e_get_flags(*(_sl1e)) & _PAGE_PRESENT ) \
1488 {_code} \
1489 if ( _done ) break; \
1490 increment_ptr_to_guest_entry(_gl1p); \
1491 } \
1492 unmap_shadow_page(_sp); \
1493 } while (0)
1495 /* 32-bit l1, on PAE or 64-bit shadows: need to walk both pages of shadow */
1496 #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
1497 #define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \
1498 do { \
1499 int __done = 0; \
1500 _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, \
1501 ({ (__done = _done); }), _code); \
1502 _sl1mfn = _mfn(mfn_x(_sl1mfn) + 1); \
1503 if ( !__done ) \
1504 _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, \
1505 ({ (__done = _done); }), _code); \
1506 } while (0)
1507 #else /* Everything else; l1 shadows are only one page */
1508 #define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \
1509 _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)
1510 #endif
1513 #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
1515 /* 32-bit l2 on PAE/64: four pages, touch every second entry, and avoid Xen */
1516 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code) \
1517 do { \
1518 int _i, _j, __done = 0; \
1519 ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask) \
1520 == PGC_SH_l2_32_shadow); \
1521 for ( _j = 0; _j < 4 && !__done; _j++ ) \
1522 { \
1523 shadow_l2e_t *_sp = map_shadow_page(_sl2mfn); \
1524 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i += 2 ) \
1525 if ( (!(_xen)) \
1526 || ((_j * SHADOW_L2_PAGETABLE_ENTRIES) + _i) \
1527 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT) ) \
1528 { \
1529 (_sl2e) = _sp + _i; \
1530 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
1531 {_code} \
1532 if ( (__done = (_done)) ) break; \
1533 increment_ptr_to_guest_entry(_gl2p); \
1534 } \
1535 unmap_shadow_page(_sp); \
1536 _sl2mfn = _mfn(mfn_x(_sl2mfn) + 1); \
1537 } \
1538 } while (0)
1540 #elif GUEST_PAGING_LEVELS == 2
1542 /* 32-bit on 32-bit: avoid Xen entries */
1543 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code) \
1544 do { \
1545 int _i; \
1546 shadow_l2e_t *_sp = map_shadow_page((_sl2mfn)); \
1547 ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask) \
1548 == PGC_SH_l2_32_shadow); \
1549 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
1550 if ( (!(_xen)) \
1551 || \
1552 (_i < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
1553 { \
1554 (_sl2e) = _sp + _i; \
1555 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
1556 {_code} \
1557 if ( _done ) break; \
1558 increment_ptr_to_guest_entry(_gl2p); \
1559 } \
1560 unmap_shadow_page(_sp); \
1561 } while (0)
1563 #elif GUEST_PAGING_LEVELS == 3
1565 /* PAE: if it's an l2h, don't touch Xen mappings */
1566 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code) \
1567 do { \
1568 int _i; \
1569 shadow_l2e_t *_sp = map_shadow_page((_sl2mfn)); \
1570 ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask) \
1571 == PGC_SH_l2_pae_shadow \
1572 || (mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask) \
1573 == PGC_SH_l2h_pae_shadow); \
1574 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
1575 if ( (!(_xen)) \
1576 || ((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask) \
1577 != PGC_SH_l2h_pae_shadow) \
1578 || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES)) \
1579 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
1580 { \
1581 (_sl2e) = _sp + _i; \
1582 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
1583 {_code} \
1584 if ( _done ) break; \
1585 increment_ptr_to_guest_entry(_gl2p); \
1586 } \
1587 unmap_shadow_page(_sp); \
1588 } while (0)
1590 #else
1592 /* 64-bit l2: touch all entries */
1593 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code) \
1594 do { \
1595 int _i; \
1596 shadow_l2e_t *_sp = map_shadow_page((_sl2mfn)); \
1597 ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask) \
1598 == PGC_SH_l2_64_shadow); \
1599 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
1600 { \
1601 (_sl2e) = _sp + _i; \
1602 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
1603 {_code} \
1604 if ( _done ) break; \
1605 increment_ptr_to_guest_entry(_gl2p); \
1606 } \
1607 unmap_shadow_page(_sp); \
1608 } while (0)
1610 #endif /* different kinds of l2 */
1612 #if GUEST_PAGING_LEVELS == 3
1614 /* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
1615 #define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code) \
1616 do { \
1617 int _i; \
1618 for ( _i = 0; _i < 4; _i++ ) \
1619 { \
1620 if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT ) \
1621 {_code} \
1622 if ( _done ) break; \
1623 _sl3e++; \
1624 increment_ptr_to_guest_entry(_gl3p); \
1625 } \
1626 } while (0)
1628 /* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
1629 #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \
1630 do { \
1631 int _i, _j, _k, __done = 0; \
1632 ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask) \
1633 == PGC_SH_l3_pae_shadow); \
1634 /* The subshadows are split, 64 on each page of the shadow */ \
1635 for ( _j = 0; _j < 2 && !__done; _j++ ) \
1636 { \
1637 void *_sp = sh_map_domain_page(_sl3mfn); \
1638 for ( _i = 0; _i < 64; _i++ ) \
1639 { \
1640 /* Every second 32-byte region is a bookkeeping entry */ \
1641 _sl3e = (shadow_l3e_t *)(_sp + (64 * _i)); \
1642 if ( (sl3p_to_info(_sl3e))->refcount > 0 ) \
1643 SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, \
1644 ({ __done = (_done); __done; }), \
1645 _code); \
1646 else \
1647 for ( _k = 0 ; _k < 4 ; _k++ ) \
1648 increment_ptr_to_guest_entry(_gl3p); \
1649 if ( __done ) break; \
1650 } \
1651 sh_unmap_domain_page(_sp); \
1652 _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1); \
1653 } \
1654 } while (0)
1656 #elif GUEST_PAGING_LEVELS == 4
1658 /* 64-bit l3: touch all entries */
1659 #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \
1660 do { \
1661 int _i; \
1662 shadow_l3e_t *_sp = map_shadow_page((_sl3mfn)); \
1663 ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask) \
1664 == PGC_SH_l3_64_shadow); \
1665 for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ ) \
1666 { \
1667 (_sl3e) = _sp + _i; \
1668 if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT ) \
1669 {_code} \
1670 if ( _done ) break; \
1671 increment_ptr_to_guest_entry(_gl3p); \
1672 } \
1673 unmap_shadow_page(_sp); \
1674 } while (0)
1676 /* 64-bit l4: avoid Xen mappings */
1677 #define SHADOW_FOREACH_L4E(_sl4mfn, _sl4e, _gl4p, _done, _xen, _code) \
1678 do { \
1679 int _i; \
1680 shadow_l4e_t *_sp = map_shadow_page((_sl4mfn)); \
1681 ASSERT((mfn_to_page(_sl4mfn)->count_info & PGC_SH_type_mask) \
1682 == PGC_SH_l4_64_shadow); \
1683 for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ ) \
1684 { \
1685 if ( (!(_xen)) || is_guest_l4_slot(_i) ) \
1686 { \
1687 (_sl4e) = _sp + _i; \
1688 if ( shadow_l4e_get_flags(*(_sl4e)) & _PAGE_PRESENT ) \
1689 {_code} \
1690 if ( _done ) break; \
1691 } \
1692 increment_ptr_to_guest_entry(_gl4p); \
1693 } \
1694 unmap_shadow_page(_sp); \
1695 } while (0)
1697 #endif
1701 /**************************************************************************/
1702 /* Functions to install Xen mappings and linear mappings in shadow pages */
1704 static mfn_t sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type);
1706 // XXX -- this function should probably be moved to shadow-common.c, but that
1707 // probably wants to wait until the shadow types have been moved from
1708 // shadow-types.h to shadow-private.h
1709 //
1710 #if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
1711 void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
1713 struct domain *d = v->domain;
1714 shadow_l4e_t *sl4e;
1716 sl4e = sh_map_domain_page(sl4mfn);
1717 ASSERT(sl4e != NULL);
1718 ASSERT(sizeof (l4_pgentry_t) == sizeof (shadow_l4e_t));
1720 /* Copy the common Xen mappings from the idle domain */
1721 memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1722 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1723 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
1725 /* Install the per-domain mappings for this domain */
1726 sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] =
1727 shadow_l4e_from_mfn(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3)),
1728 __PAGE_HYPERVISOR);
1730 /* Linear mapping */
1731 sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
1732 shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
1734 if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) )
1736 // linear tables may not be used with translated PV guests
1737 sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
1738 shadow_l4e_empty();
1740 else
1742 sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
1743 shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
1746 if ( shadow_mode_translate(v->domain) )
1748 /* install domain-specific P2M table */
1749 sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] =
1750 shadow_l4e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
1751 __PAGE_HYPERVISOR);
1754 sh_unmap_domain_page(sl4e);
1756 #endif
1758 #if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
1759 // For 3-on-3 PV guests, we need to make sure the xen mappings are in
1760 // place, which means that we need to populate the l2h entry in the l3
1761 // table.
1763 void sh_install_xen_entries_in_l2h(struct vcpu *v,
1764 mfn_t sl2hmfn)
1766 struct domain *d = v->domain;
1767 shadow_l2e_t *sl2e;
1768 int i;
1770 sl2e = sh_map_domain_page(sl2hmfn);
1771 ASSERT(sl2e != NULL);
1772 ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
1774 /* Copy the common Xen mappings from the idle domain */
1775 memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
1776 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
1777 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
1779 /* Install the per-domain mappings for this domain */
1780 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
1781 sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
1782 shadow_l2e_from_mfn(
1783 page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
1784 __PAGE_HYPERVISOR);
1786 /* We don't set up a linear mapping here because we can't until this
1787 * l2h is installed in an l3e. sh_update_linear_entries() handles
1788 * the linear mappings when the l3 is loaded. We zero them here, just as
1789 * a safety measure.
1790 */
1791 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
1792 sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
1793 shadow_l2e_empty();
1794 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
1795 sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START) + i] =
1796 shadow_l2e_empty();
1798 if ( shadow_mode_translate(d) )
1800 /* Install the domain-specific p2m table */
1801 l3_pgentry_t *p2m;
1802 ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
1803 p2m = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
1804 for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
1806 sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START) + i] =
1807 (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
1808 ? shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])),
1809 __PAGE_HYPERVISOR)
1810 : shadow_l2e_empty();
1812 sh_unmap_domain_page(p2m);
1815 sh_unmap_domain_page(sl2e);
1818 void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
1820 shadow_l3e_t *sl3e;
1821 guest_l3e_t *gl3e = v->arch.guest_vtable;
1822 shadow_l3e_t new_sl3e;
1823 gfn_t l2gfn;
1824 mfn_t l2gmfn, l2smfn;
1825 int r;
1827 ASSERT(!shadow_mode_external(v->domain));
1828 ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
1829 l2gfn = guest_l3e_get_gfn(gl3e[3]);
1830 l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn));
1831 l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
1832 if ( !valid_mfn(l2smfn) )
1834 /* must remove write access to this page before shadowing it */
1835 // XXX -- should check to see whether this is better with level==0 or
1836 // level==2...
1837 if ( shadow_remove_write_access(v, l2gmfn, 2, 0xc0000000ul) != 0 )
1838 flush_tlb_mask(v->domain->domain_dirty_cpumask);
1840 l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
1842 l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
1843 ft_prefetch);
1844 sl3e = sh_map_domain_page(sl3mfn);
1845 r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
1846 sh_unmap_domain_page(sl3e);
1848 #endif
1851 #if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
1852 void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
1854 struct domain *d = v->domain;
1855 shadow_l2e_t *sl2e;
1856 int i;
1858 sl2e = sh_map_domain_page(sl2mfn);
1859 ASSERT(sl2e != NULL);
1860 ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
1862 /* Copy the common Xen mappings from the idle domain */
1863 memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
1864 &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
1865 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
1867 /* Install the per-domain mappings for this domain */
1868 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
1869 sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
1870 shadow_l2e_from_mfn(
1871 page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
1872 __PAGE_HYPERVISOR);
1874 /* Linear mapping */
1875 sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
1876 shadow_l2e_from_mfn(sl2mfn, __PAGE_HYPERVISOR);
1878 if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) )
1880 // linear tables may not be used with translated PV guests
1881 sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
1882 shadow_l2e_empty();
1884 else
1886 sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
1887 shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
1890 if ( shadow_mode_translate(d) )
1892 /* install domain-specific P2M table */
1893 sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START)] =
1894 shadow_l2e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
1895 __PAGE_HYPERVISOR);
1898 sh_unmap_domain_page(sl2e);
1900 #endif
1906 /**************************************************************************/
1907 /* Create a shadow of a given guest page.
1908 */
1909 static mfn_t
1910 sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
1912 mfn_t smfn = shadow_alloc(v->domain, shadow_type, mfn_x(gmfn));
1913 SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
1914 mfn_x(gmfn), shadow_type, mfn_x(smfn));
1916 if ( shadow_type != PGC_SH_guest_root_type )
1917 /* Lower-level shadow, not yet linked form a higher level */
1918 mfn_to_page(smfn)->up = 0;
1920 // Create the Xen mappings...
1921 if ( !shadow_mode_external(v->domain) )
1923 switch (shadow_type)
1925 #if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
1926 case PGC_SH_l4_shadow:
1927 sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
1928 #endif
1929 #if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
1930 case PGC_SH_l3_shadow:
1931 sh_install_xen_entries_in_l3(v, gmfn, smfn); break;
1932 case PGC_SH_l2h_shadow:
1933 sh_install_xen_entries_in_l2h(v, smfn); break;
1934 #endif
1935 #if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
1936 case PGC_SH_l2_shadow:
1937 sh_install_xen_entries_in_l2(v, gmfn, smfn); break;
1938 #endif
1939 default: /* Do nothing */ break;
1943 shadow_promote(v, gmfn, shadow_type);
1944 set_shadow_status(v, gmfn, shadow_type, smfn);
1946 return smfn;
1949 /* Make a splintered superpage shadow */
1950 static mfn_t
1951 make_fl1_shadow(struct vcpu *v, gfn_t gfn)
1953 mfn_t smfn = shadow_alloc(v->domain, PGC_SH_fl1_shadow,
1954 (unsigned long) gfn_x(gfn));
1956 SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" SH_PRI_mfn "\n",
1957 gfn_x(gfn), mfn_x(smfn));
1959 set_fl1_shadow_status(v, gfn, smfn);
1960 return smfn;
1964 #if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
1965 mfn_t
1966 sh_make_monitor_table(struct vcpu *v)
1969 ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
1971 #if CONFIG_PAGING_LEVELS == 4
1973 struct domain *d = v->domain;
1974 mfn_t m4mfn;
1975 m4mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
1976 sh_install_xen_entries_in_l4(v, m4mfn, m4mfn);
1977 /* Remember the level of this table */
1978 mfn_to_page(m4mfn)->shadow_flags = 4;
1979 #if SHADOW_PAGING_LEVELS < 4
1980 // Install a monitor l3 table in slot 0 of the l4 table.
1981 // This is used for shadow linear maps.
1983 mfn_t m3mfn;
1984 l4_pgentry_t *l4e;
1985 m3mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
1986 mfn_to_page(m3mfn)->shadow_flags = 3;
1987 l4e = sh_map_domain_page(m4mfn);
1988 l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR);
1989 sh_unmap_domain_page(l4e);
1991 #endif /* SHADOW_PAGING_LEVELS < 4 */
1992 return m4mfn;
1995 #elif CONFIG_PAGING_LEVELS == 3
1998 struct domain *d = v->domain;
1999 mfn_t m3mfn, m2mfn;
2000 l3_pgentry_t *l3e;
2001 l2_pgentry_t *l2e;
2002 int i;
2004 m3mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
2005 /* Remember the level of this table */
2006 mfn_to_page(m3mfn)->shadow_flags = 3;
2008 // Install a monitor l2 table in slot 3 of the l3 table.
2009 // This is used for all Xen entries, including linear maps
2010 m2mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
2011 mfn_to_page(m2mfn)->shadow_flags = 2;
2012 l3e = sh_map_domain_page(m3mfn);
2013 l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
2014 sh_install_xen_entries_in_l2h(v, m2mfn);
2015 /* Install the monitor's own linear map */
2016 l2e = sh_map_domain_page(m2mfn);
2017 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
2018 l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
2019 (l3e_get_flags(l3e[i]) & _PAGE_PRESENT)
2020 ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR)
2021 : l2e_empty();
2022 sh_unmap_domain_page(l2e);
2023 sh_unmap_domain_page(l3e);
2025 SHADOW_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
2026 return m3mfn;
2029 #elif CONFIG_PAGING_LEVELS == 2
2032 struct domain *d = v->domain;
2033 mfn_t m2mfn;
2034 m2mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
2035 sh_install_xen_entries_in_l2(v, m2mfn, m2mfn);
2036 /* Remember the level of this table */
2037 mfn_to_page(m2mfn)->shadow_flags = 2;
2038 return m2mfn;
2041 #else
2042 #error this should not happen
2043 #endif /* CONFIG_PAGING_LEVELS */
2045 #endif /* SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS */
2047 /**************************************************************************/
2048 /* These functions also take a virtual address and return the level-N
2049 * shadow table mfn and entry, but they create the shadow pagetables if
2050 * they are needed. The "demand" argument is non-zero when handling
2051 * a demand fault (so we know what to do about accessed bits &c).
2052 * If the necessary tables are not present in the guest, they return NULL. */
2053 #if GUEST_PAGING_LEVELS >= 4
2054 static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v,
2055 walk_t *gw,
2056 mfn_t *sl4mfn)
2058 /* There is always a shadow of the top level table. Get it. */
2059 *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
2060 /* Reading the top level table is always valid. */
2061 return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
2063 #endif /* GUEST_PAGING_LEVELS >= 4 */
2066 #if GUEST_PAGING_LEVELS >= 3
2067 static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v,
2068 walk_t *gw,
2069 mfn_t *sl3mfn,
2070 fetch_type_t ft)
2072 #if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
2073 mfn_t sl4mfn;
2074 shadow_l4e_t *sl4e;
2075 if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
2076 /* Get the l4e */
2077 sl4e = shadow_get_and_create_l4e(v, gw, &sl4mfn);
2078 ASSERT(sl4e != NULL);
2079 if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT )
2081 *sl3mfn = shadow_l4e_get_mfn(*sl4e);
2082 ASSERT(valid_mfn(*sl3mfn));
2084 else
2086 int r;
2087 shadow_l4e_t new_sl4e;
2088 /* No l3 shadow installed: find and install it. */
2089 *sl3mfn = get_shadow_status(v, gw->l3mfn, PGC_SH_l3_shadow);
2090 if ( !valid_mfn(*sl3mfn) )
2092 /* No l3 shadow of this page exists at all: make one. */
2093 *sl3mfn = sh_make_shadow(v, gw->l3mfn, PGC_SH_l3_shadow);
2095 /* Install the new sl3 table in the sl4e */
2096 l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn,
2097 *sl3mfn, &new_sl4e, ft);
2098 r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
2099 ASSERT((r & SHADOW_SET_FLUSH) == 0);
2101 /* Now follow it down a level. Guaranteed to succeed. */
2102 return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
2103 #else /* PAE... */
2104 /* There is always a shadow of the top level table. Get it. */
2105 *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
2106 /* This next line is important: the shadow l3 table is in an 8k
2107 * shadow and we need to return the right mfn of the pair. This call
2108 * will set it for us as a side-effect. */
2109 (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
2110 ASSERT(v->arch.shadow_vtable);
2111 return ((shadow_l3e_t *)v->arch.shadow_vtable)
2112 + shadow_l3_table_offset(gw->va);
2113 #endif /* GUEST_PAGING_LEVELS >= 4 */
2115 #endif /* GUEST_PAGING_LEVELS >= 3 */
2118 static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v,
2119 walk_t *gw,
2120 mfn_t *sl2mfn,
2121 fetch_type_t ft)
2123 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
2124 mfn_t sl3mfn = _mfn(INVALID_MFN);
2125 shadow_l3e_t *sl3e;
2126 if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
2127 /* Get the l3e */
2128 sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft);
2129 ASSERT(sl3e != NULL); /* Since we know guest PT is valid this far */
2130 if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT )
2132 *sl2mfn = shadow_l3e_get_mfn(*sl3e);
2133 ASSERT(valid_mfn(*sl2mfn));
2135 else
2137 int r;
2138 shadow_l3e_t new_sl3e;
2139 /* No l2 shadow installed: find and install it. */
2140 *sl2mfn = get_shadow_status(v, gw->l2mfn, PGC_SH_l2_shadow);
2141 if ( !valid_mfn(*sl2mfn) )
2143 /* No l2 shadow of this page exists at all: make one. */
2144 *sl2mfn = sh_make_shadow(v, gw->l2mfn, PGC_SH_l2_shadow);
2146 /* Install the new sl2 table in the sl3e */
2147 l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn,
2148 *sl2mfn, &new_sl3e, ft);
2149 r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
2150 ASSERT((r & SHADOW_SET_FLUSH) == 0);
2151 #if GUEST_PAGING_LEVELS == 3
2152 /* Need to sync up the linear maps, as we are about to use them */
2153 ASSERT( r & SHADOW_SET_L3PAE_RECOPY );
2154 sh_pae_recopy(v->domain);
2155 #endif
2157 /* Now follow it down a level. Guaranteed to succeed. */
2158 return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
2159 #else /* 32bit... */
2160 /* There is always a shadow of the top level table. Get it. */
2161 *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
2162 /* This next line is important: the guest l2 has a 16k
2163 * shadow, we need to return the right mfn of the four. This
2164 * call will set it for us as a side-effect. */
2165 (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
2166 /* Reading the top level table is always valid. */
2167 return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
2168 #endif
2172 static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v,
2173 walk_t *gw,
2174 mfn_t *sl1mfn,
2175 fetch_type_t ft)
2177 mfn_t sl2mfn;
2178 shadow_l2e_t *sl2e;
2180 /* Get the l2e */
2181 sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft);
2182 if ( sl2e == NULL ) return NULL;
2183 /* Install the sl1 in the l2e if it wasn't there or if we need to
2184 * re-do it to fix a PSE dirty bit. */
2185 if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT
2186 && likely(ft != ft_demand_write
2187 || (guest_l2e_get_flags(*gw->l2e) & _PAGE_DIRTY)
2188 || !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)) )
2190 *sl1mfn = shadow_l2e_get_mfn(*sl2e);
2191 ASSERT(valid_mfn(*sl1mfn));
2193 else
2195 shadow_l2e_t new_sl2e;
2196 int r, flags = guest_l2e_get_flags(*gw->l2e);
2197 /* No l1 shadow installed: find and install it. */
2198 if ( !(flags & _PAGE_PRESENT) )
2199 return NULL; /* No guest page. */
2200 if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) )
2202 /* Splintering a superpage */
2203 gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
2204 *sl1mfn = get_fl1_shadow_status(v, l2gfn);
2205 if ( !valid_mfn(*sl1mfn) )
2207 /* No fl1 shadow of this superpage exists at all: make one. */
2208 *sl1mfn = make_fl1_shadow(v, l2gfn);
2211 else
2213 /* Shadowing an actual guest l1 table */
2214 if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
2215 *sl1mfn = get_shadow_status(v, gw->l1mfn, PGC_SH_l1_shadow);
2216 if ( !valid_mfn(*sl1mfn) )
2218 /* No l1 shadow of this page exists at all: make one. */
2219 *sl1mfn = sh_make_shadow(v, gw->l1mfn, PGC_SH_l1_shadow);
2222 /* Install the new sl1 table in the sl2e */
2223 l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn,
2224 *sl1mfn, &new_sl2e, ft);
2225 r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
2226 ASSERT((r & SHADOW_SET_FLUSH) == 0);
2227 /* This next line is important: in 32-on-PAE and 32-on-64 modes,
2228 * the guest l1 table has an 8k shadow, and we need to return
2229 * the right mfn of the pair. This call will set it for us as a
2230 * side-effect. (In all other cases, it's a no-op and will be
2231 * compiled out.) */
2232 (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
2234 /* Now follow it down a level. Guaranteed to succeed. */
2235 return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
2240 /**************************************************************************/
2241 /* Destructors for shadow tables:
2242 * Unregister the shadow, decrement refcounts of any entries present in it,
2243 * and release the memory.
2245 * N.B. These destructors do not clear the contents of the shadows.
2246 * This allows us to delay TLB shootdowns until the page is being reused.
2247 * See shadow_alloc() and shadow_free() for how this is handled.
2248 */
2250 #if GUEST_PAGING_LEVELS >= 4
2251 void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
2253 shadow_l4e_t *sl4e;
2254 u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
2255 mfn_t gmfn, sl4mfn;
2256 int xen_mappings;
2258 SHADOW_DEBUG(DESTROY_SHADOW,
2259 "%s(%05lx)\n", __func__, mfn_x(smfn));
2260 ASSERT(t == PGC_SH_l4_shadow);
2262 /* Record that the guest page isn't shadowed any more (in this type) */
2263 gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
2264 delete_shadow_status(v, gmfn, t, smfn);
2265 shadow_demote(v, gmfn, t);
2266 /* Take this shadow off the list of root shadows */
2267 list_del_init(&mfn_to_page(smfn)->list);
2269 /* Decrement refcounts of all the old entries */
2270 xen_mappings = (!shadow_mode_external(v->domain));
2271 sl4mfn = smfn;
2272 SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
2273 if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT )
2275 sh_put_ref(v, shadow_l4e_get_mfn(*sl4e),
2276 (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT)
2277 | ((unsigned long)sl4e & ~PAGE_MASK));
2279 });
2281 /* Put the memory back in the pool */
2282 shadow_free(v->domain, smfn);
2284 #endif
2286 #if GUEST_PAGING_LEVELS >= 3
2287 void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
2289 shadow_l3e_t *sl3e;
2290 u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
2291 mfn_t gmfn, sl3mfn;
2293 SHADOW_DEBUG(DESTROY_SHADOW,
2294 "%s(%05lx)\n", __func__, mfn_x(smfn));
2295 ASSERT(t == PGC_SH_l3_shadow);
2297 /* Record that the guest page isn't shadowed any more (in this type) */
2298 gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
2299 delete_shadow_status(v, gmfn, t, smfn);
2300 shadow_demote(v, gmfn, t);
2301 #if GUEST_PAGING_LEVELS == 3
2302 /* Take this shadow off the list of root shadows */
2303 list_del_init(&mfn_to_page(smfn)->list);
2304 #endif
2306 /* Decrement refcounts of all the old entries */
2307 sl3mfn = smfn;
2308 SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
2309 if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT )
2310 sh_put_ref(v, shadow_l3e_get_mfn(*sl3e),
2311 (((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT)
2312 | ((unsigned long)sl3e & ~PAGE_MASK));
2313 });
2315 /* Put the memory back in the pool */
2316 shadow_free(v->domain, smfn);
2318 #endif
2321 #if GUEST_PAGING_LEVELS == 3
2322 static void sh_destroy_l3_subshadow(struct vcpu *v,
2323 shadow_l3e_t *sl3e)
2324 /* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
2326 int i;
2327 ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0);
2328 for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ )
2329 if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT )
2330 sh_put_ref(v, shadow_l3e_get_mfn(sl3e[i]),
2331 maddr_from_mapped_domain_page(sl3e));
2333 #endif
2335 #if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
2336 void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
2337 /* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
2339 int i, j;
2340 struct pae_l3_bookkeeping *bk;
2342 ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask)
2343 == PGC_SH_l3_pae_shadow);
2344 /* The subshadows are split, 64 on each page of the shadow */
2345 for ( i = 0; i < 2; i++ )
2347 void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i));
2348 for ( j = 0; j < 64; j++ )
2350 /* Every second 32-byte region is a bookkeeping entry */
2351 bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
2352 if ( bk->pinned )
2353 sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
2354 /* Check whether we've just freed the whole shadow */
2355 if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 )
2357 sh_unmap_domain_page(p);
2358 return;
2361 sh_unmap_domain_page(p);
2364 #endif
2366 void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
2368 shadow_l2e_t *sl2e;
2369 u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
2370 mfn_t gmfn, sl2mfn;
2371 int xen_mappings;
2373 SHADOW_DEBUG(DESTROY_SHADOW,
2374 "%s(%05lx)\n", __func__, mfn_x(smfn));
2375 ASSERT(t == PGC_SH_l2_shadow
2376 || t == PGC_SH_l2h_pae_shadow);
2378 /* Record that the guest page isn't shadowed any more (in this type) */
2379 gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
2380 delete_shadow_status(v, gmfn, t, smfn);
2381 shadow_demote(v, gmfn, t);
2382 #if GUEST_PAGING_LEVELS == 2
2383 /* Take this shadow off the list of root shadows */
2384 list_del_init(&mfn_to_page(smfn)->list);
2385 #endif
2387 /* Decrement refcounts of all the old entries */
2388 sl2mfn = smfn;
2389 xen_mappings = (!shadow_mode_external(v->domain) &&
2390 ((GUEST_PAGING_LEVELS == 2) ||
2391 ((GUEST_PAGING_LEVELS == 3) &&
2392 (t == PGC_SH_l2h_pae_shadow))));
2393 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
2394 if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT )
2395 sh_put_ref(v, shadow_l2e_get_mfn(*sl2e),
2396 (((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
2397 | ((unsigned long)sl2e & ~PAGE_MASK));
2398 });
2400 /* Put the memory back in the pool */
2401 shadow_free(v->domain, smfn);
2404 void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
2406 struct domain *d = v->domain;
2407 shadow_l1e_t *sl1e;
2408 u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
2410 SHADOW_DEBUG(DESTROY_SHADOW,
2411 "%s(%05lx)\n", __func__, mfn_x(smfn));
2412 ASSERT(t == PGC_SH_l1_shadow || t == PGC_SH_fl1_shadow);
2414 /* Record that the guest page isn't shadowed any more (in this type) */
2415 if ( t == PGC_SH_fl1_shadow )
2417 gfn_t gfn = _gfn(mfn_to_page(smfn)->u.inuse.type_info);
2418 delete_fl1_shadow_status(v, gfn, smfn);
2420 else
2422 mfn_t gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
2423 delete_shadow_status(v, gmfn, t, smfn);
2424 shadow_demote(v, gmfn, t);
2427 if ( shadow_mode_refcounts(d) )
2429 /* Decrement refcounts of all the old entries */
2430 mfn_t sl1mfn = smfn;
2431 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
2432 if ( shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT )
2433 shadow_put_page_from_l1e(*sl1e, d);
2434 });
2437 /* Put the memory back in the pool */
2438 shadow_free(v->domain, smfn);
2441 #if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
2442 void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
2444 struct domain *d = v->domain;
2445 ASSERT((mfn_to_page(mmfn)->count_info & PGC_SH_type_mask)
2446 == PGC_SH_monitor_table);
2448 #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
2449 /* Need to destroy the l3 monitor page in slot 0 too */
2451 l4_pgentry_t *l4e = sh_map_domain_page(mmfn);
2452 ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
2453 shadow_free(d, _mfn(l4e_get_pfn(l4e[0])));
2454 sh_unmap_domain_page(l4e);
2456 #elif CONFIG_PAGING_LEVELS == 3
2457 /* Need to destroy the l2 monitor page in slot 4 too */
2459 l3_pgentry_t *l3e = sh_map_domain_page(mmfn);
2460 ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
2461 shadow_free(d, _mfn(l3e_get_pfn(l3e[3])));
2462 sh_unmap_domain_page(l3e);
2464 #endif
2466 /* Put the memory back in the pool */
2467 shadow_free(d, mmfn);
2469 #endif
2471 /**************************************************************************/
2472 /* Functions to destroy non-Xen mappings in a pagetable hierarchy.
2473 * These are called from common code when we are running out of shadow
2474 * memory, and unpinning all the top-level shadows hasn't worked.
2476 * This implementation is pretty crude and slow, but we hope that it won't
2477 * be called very often. */
2479 #if GUEST_PAGING_LEVELS == 2
2481 void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
2483 shadow_l2e_t *sl2e;
2484 int xen_mappings = !shadow_mode_external(v->domain);
2485 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
2486 (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
2487 });
2490 #elif GUEST_PAGING_LEVELS == 3
2492 void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
2493 /* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
2495 shadow_l3e_t *sl3e;
2496 SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
2497 if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
2498 mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
2499 if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask)
2500 == PGC_SH_l2h_pae_shadow )
2502 /* High l2: need to pick particular l2es to unhook */
2503 shadow_l2e_t *sl2e;
2504 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
2505 (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
2506 });
2508 else
2510 /* Normal l2: can safely unhook the whole l3e */
2511 (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
2514 });
2515 /* We've changed PAE L3 entries: must sync up various copies of them */
2516 sh_pae_recopy(v->domain);
2519 #elif GUEST_PAGING_LEVELS == 4
2521 void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn)
2523 shadow_l4e_t *sl4e;
2524 int xen_mappings = !shadow_mode_external(v->domain);
2525 SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
2526 (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
2527 });
2530 #endif
2532 /**************************************************************************/
2533 /* Internal translation functions.
2534 * These functions require a pointer to the shadow entry that will be updated.
2535 */
2537 /* These functions take a new guest entry, translate it to shadow and write
2538 * the shadow entry.
2540 * They return the same bitmaps as the shadow_set_lXe() functions.
2541 */
2543 #if GUEST_PAGING_LEVELS >= 4
2544 static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
2546 shadow_l4e_t new_sl4e;
2547 guest_l4e_t *new_gl4e = new_ge;
2548 shadow_l4e_t *sl4p = se;
2549 mfn_t sl3mfn = _mfn(INVALID_MFN);
2550 int result = 0;
2552 perfc_incrc(shadow_validate_gl4e_calls);
2554 if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
2556 gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
2557 mfn_t gl3mfn = vcpu_gfn_to_mfn(v, gl3gfn);
2558 if ( valid_mfn(gl3mfn) )
2559 sl3mfn = get_shadow_status(v, gl3mfn, PGC_SH_l3_shadow);
2560 else
2561 result |= SHADOW_SET_ERROR;
2563 l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
2564 sl3mfn, &new_sl4e, ft_prefetch);
2566 // check for updates to xen reserved slots
2567 if ( !shadow_mode_external(v->domain) )
2569 int shadow_index = (((unsigned long)sl4p & ~PAGE_MASK) /
2570 sizeof(shadow_l4e_t));
2571 int reserved_xen_slot = !is_guest_l4_slot(shadow_index);
2573 if ( unlikely(reserved_xen_slot) )
2575 // attempt by the guest to write to a xen reserved slot
2576 //
2577 SHADOW_PRINTK("%s out-of-range update "
2578 "sl4mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
2579 __func__, mfn_x(sl4mfn), shadow_index, new_sl4e.l4);
2580 if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
2582 SHADOW_ERROR("out-of-range l4e update\n");
2583 result |= SHADOW_SET_ERROR;
2586 // do not call shadow_set_l4e...
2587 return result;
2591 result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
2592 return result;
2594 #endif // GUEST_PAGING_LEVELS >= 4
2596 #if GUEST_PAGING_LEVELS >= 3
2597 static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
2599 shadow_l3e_t new_sl3e;
2600 guest_l3e_t *new_gl3e = new_ge;
2601 shadow_l3e_t *sl3p = se;
2602 mfn_t sl2mfn = _mfn(INVALID_MFN);
2603 int result = 0;
2605 perfc_incrc(shadow_validate_gl3e_calls);
2607 #if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
2609 /* If we've updated a subshadow which is unreferenced then
2610 we don't care what value is being written - bail. */
2611 struct pae_l3_bookkeeping *info = sl3p_to_info(se);
2612 if(!info->refcount)
2613 return result;
2615 #endif
2617 if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
2619 gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
2620 mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
2621 if ( valid_mfn(gl2mfn) )
2622 sl2mfn = get_shadow_status(v, gl2mfn, PGC_SH_l2_shadow);
2623 else
2624 result |= SHADOW_SET_ERROR;
2626 l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN),
2627 sl2mfn, &new_sl3e, ft_prefetch);
2628 result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
2630 #if GUEST_PAGING_LEVELS == 3
2631 /* We have changed a PAE l3 entry: need to sync up the possible copies
2632 * of it */
2633 if ( result & SHADOW_SET_L3PAE_RECOPY )
2634 sh_pae_recopy(v->domain);
2635 #endif
2637 return result;
2639 #endif // GUEST_PAGING_LEVELS >= 3
2641 static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
2643 shadow_l2e_t new_sl2e;
2644 guest_l2e_t *new_gl2e = new_ge;
2645 shadow_l2e_t *sl2p = se;
2646 mfn_t sl1mfn = _mfn(INVALID_MFN);
2647 int result = 0;
2649 perfc_incrc(shadow_validate_gl2e_calls);
2651 if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
2653 gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
2654 if ( guest_supports_superpages(v) &&
2655 (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
2657 // superpage -- need to look up the shadow L1 which holds the
2658 // splitters...
2659 sl1mfn = get_fl1_shadow_status(v, gl1gfn);
2660 #if 0
2661 // XXX - it's possible that we want to do some kind of prefetch
2662 // for superpage fl1's here, but this is *not* on the demand path,
2663 // so we'll hold off trying that for now...
2664 //
2665 if ( !valid_mfn(sl1mfn) )
2666 sl1mfn = make_fl1_shadow(v, gl1gfn);
2667 #endif
2669 else
2671 mfn_t gl1mfn = vcpu_gfn_to_mfn(v, gl1gfn);
2672 if ( valid_mfn(gl1mfn) )
2673 sl1mfn = get_shadow_status(v, gl1mfn, PGC_SH_l1_shadow);
2674 else
2675 result |= SHADOW_SET_ERROR;
2678 l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
2679 sl1mfn, &new_sl2e, ft_prefetch);
2681 // check for updates to xen reserved slots in PV guests...
2682 // XXX -- need to revisit this for PV 3-on-4 guests.
2683 //
2684 #if SHADOW_PAGING_LEVELS < 4
2685 #if CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS
2686 if ( !shadow_mode_external(v->domain) )
2688 int shadow_index = (((unsigned long)sl2p & ~PAGE_MASK) /
2689 sizeof(shadow_l2e_t));
2690 int reserved_xen_slot;
2692 #if SHADOW_PAGING_LEVELS == 3
2693 reserved_xen_slot =
2694 (((mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask)
2695 == PGC_SH_l2h_pae_shadow) &&
2696 (shadow_index
2697 >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
2698 #else /* SHADOW_PAGING_LEVELS == 2 */
2699 reserved_xen_slot = (shadow_index >= L2_PAGETABLE_FIRST_XEN_SLOT);
2700 #endif
2702 if ( unlikely(reserved_xen_slot) )
2704 // attempt by the guest to write to a xen reserved slot
2705 //
2706 SHADOW_PRINTK("%s out-of-range update "
2707 "sl2mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
2708 __func__, mfn_x(sl2mfn), shadow_index, new_sl2e.l2);
2709 if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
2711 SHADOW_ERROR("out-of-range l2e update\n");
2712 result |= SHADOW_SET_ERROR;
2715 // do not call shadow_set_l2e...
2716 return result;
2719 #endif /* CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS */
2720 #endif /* SHADOW_PAGING_LEVELS < 4 */
2722 result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn);
2724 return result;
2727 static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
2729 shadow_l1e_t new_sl1e;
2730 guest_l1e_t *new_gl1e = new_ge;
2731 shadow_l1e_t *sl1p = se;
2732 gfn_t gfn;
2733 mfn_t mfn;
2734 int result = 0;
2736 perfc_incrc(shadow_validate_gl1e_calls);
2738 gfn = guest_l1e_get_gfn(*new_gl1e);
2739 mfn = vcpu_gfn_to_mfn(v, gfn);
2741 l1e_propagate_from_guest(v, *new_gl1e, &new_sl1e,
2742 /* mmio? */ !valid_mfn(mfn));
2744 result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
2745 return result;
2749 /**************************************************************************/
2750 /* Functions which translate and install the shadows of arbitrary guest
2751 * entries that we have just seen the guest write. */
2754 static inline int
2755 sh_map_and_validate(struct vcpu *v, mfn_t gmfn,
2756 void *new_gp, u32 size, u32 sh_type,
2757 u32 (*shadow_index)(mfn_t *smfn, u32 idx),
2758 int (*validate_ge)(struct vcpu *v, void *ge,
2759 mfn_t smfn, void *se))
2760 /* Generic function for mapping and validating. */
2762 mfn_t smfn, smfn2, map_mfn;
2763 shadow_l1e_t *sl1p;
2764 u32 shadow_idx, guest_idx;
2765 int result = 0;
2767 /* Align address and size to guest entry boundaries */
2768 size += (unsigned long)new_gp & (sizeof (guest_l1e_t) - 1);
2769 new_gp = (void *)((unsigned long)new_gp & ~(sizeof (guest_l1e_t) - 1));
2770 size = (size + sizeof (guest_l1e_t) - 1) & ~(sizeof (guest_l1e_t) - 1);
2771 ASSERT(size + (((unsigned long)new_gp) & ~PAGE_MASK) <= PAGE_SIZE);
2773 /* Map the shadow page */
2774 smfn = get_shadow_status(v, gmfn, sh_type);
2775 ASSERT(valid_mfn(smfn)); /* Otherwise we would not have been called */
2776 guest_idx = guest_index(new_gp);
2777 map_mfn = smfn;
2778 shadow_idx = shadow_index(&map_mfn, guest_idx);
2779 sl1p = map_shadow_page(map_mfn);
2781 /* Validate one entry at a time */
2782 while ( size )
2784 smfn2 = smfn;
2785 guest_idx = guest_index(new_gp);
2786 shadow_idx = shadow_index(&smfn2, guest_idx);
2787 if ( mfn_x(smfn2) != mfn_x(map_mfn) )
2789 /* We have moved to another page of the shadow */
2790 map_mfn = smfn2;
2791 unmap_shadow_page(sl1p);
2792 sl1p = map_shadow_page(map_mfn);
2794 result |= validate_ge(v,
2795 new_gp,
2796 map_mfn,
2797 &sl1p[shadow_idx]);
2798 size -= sizeof(guest_l1e_t);
2799 new_gp += sizeof(guest_l1e_t);
2801 unmap_shadow_page(sl1p);
2802 return result;
2806 int
2807 sh_map_and_validate_gl4e(struct vcpu *v, mfn_t gl4mfn,
2808 void *new_gl4p, u32 size)
2810 #if GUEST_PAGING_LEVELS >= 4
2811 return sh_map_and_validate(v, gl4mfn, new_gl4p, size,
2812 PGC_SH_l4_shadow,
2813 shadow_l4_index,
2814 validate_gl4e);
2815 #else // ! GUEST_PAGING_LEVELS >= 4
2816 SHADOW_PRINTK("called in wrong paging mode!\n");
2817 BUG();
2818 return 0;
2819 #endif
2822 int
2823 sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
2824 void *new_gl3p, u32 size)
2826 #if GUEST_PAGING_LEVELS >= 3
2827 return sh_map_and_validate(v, gl3mfn, new_gl3p, size,
2828 PGC_SH_l3_shadow,
2829 shadow_l3_index,
2830 validate_gl3e);
2831 #else // ! GUEST_PAGING_LEVELS >= 3
2832 SHADOW_PRINTK("called in wrong paging mode!\n");
2833 BUG();
2834 return 0;
2835 #endif
2838 int
2839 sh_map_and_validate_gl2e(struct vcpu *v, mfn_t gl2mfn,
2840 void *new_gl2p, u32 size)
2842 return sh_map_and_validate(v, gl2mfn, new_gl2p, size,
2843 PGC_SH_l2_shadow,
2844 shadow_l2_index,
2845 validate_gl2e);
2848 int
2849 sh_map_and_validate_gl2he(struct vcpu *v, mfn_t gl2mfn,
2850 void *new_gl2p, u32 size)
2852 #if GUEST_PAGING_LEVELS == 3
2853 return sh_map_and_validate(v, gl2mfn, new_gl2p, size,
2854 PGC_SH_l2h_shadow,
2855 shadow_l2_index,
2856 validate_gl2e);
2857 #else /* Non-PAE guests don't have different kinds of l2 table */
2858 SHADOW_PRINTK("called in wrong paging mode!\n");
2859 BUG();
2860 return 0;
2861 #endif
2864 int
2865 sh_map_and_validate_gl1e(struct vcpu *v, mfn_t gl1mfn,
2866 void *new_gl1p, u32 size)
2868 return sh_map_and_validate(v, gl1mfn, new_gl1p, size,
2869 PGC_SH_l1_shadow,
2870 shadow_l1_index,
2871 validate_gl1e);
2875 /**************************************************************************/
2876 /* Optimization: If we see two emulated writes of zeros to the same
2877 * page-table without another kind of page fault in between, we guess
2878 * that this is a batch of changes (for process destruction) and
2879 * unshadow the page so we don't take a pagefault on every entry. This
2880 * should also make finding writeable mappings of pagetables much
2881 * easier. */
2883 /* Look to see if this is the second emulated write in a row to this
2884 * page, and unshadow/unhook if it is */
2885 static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
2887 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
2888 if ( v->arch.shadow.last_emulated_mfn == mfn_x(gmfn) &&
2889 sh_mfn_is_a_page_table(gmfn) )
2891 u32 flags = mfn_to_page(gmfn)->shadow_flags;
2892 mfn_t smfn;
2893 if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) )
2895 perfc_incrc(shadow_early_unshadow);
2896 sh_remove_shadows(v, gmfn, 0 /* Can fail to unshadow */ );
2897 return;
2899 /* SHF_unhooked_mappings is set to make sure we only unhook
2900 * once in a single batch of updates. It is reset when this
2901 * top-level page is loaded into CR3 again */
2902 if ( !(flags & SHF_unhooked_mappings) )
2904 perfc_incrc(shadow_early_unshadow_top);
2905 mfn_to_page(gmfn)->shadow_flags |= SHF_unhooked_mappings;
2906 if ( flags & SHF_L2_32 )
2908 smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow);
2909 shadow_unhook_mappings(v, smfn);
2911 if ( flags & SHF_L3_PAE )
2913 smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow);
2914 shadow_unhook_mappings(v, smfn);
2916 if ( flags & SHF_L4_64 )
2918 smfn = get_shadow_status(v, gmfn, PGC_SH_l4_64_shadow);
2919 shadow_unhook_mappings(v, smfn);
2923 v->arch.shadow.last_emulated_mfn = mfn_x(gmfn);
2924 #endif
2927 /* Stop counting towards early unshadows, as we've seen a real page fault */
2928 static inline void reset_early_unshadow(struct vcpu *v)
2930 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
2931 v->arch.shadow.last_emulated_mfn = INVALID_MFN;
2932 #endif
2937 /**************************************************************************/
2938 /* Entry points into the shadow code */
2940 /* Called from pagefault handler in Xen, and from the HVM trap handlers
2941 * for pagefaults. Returns 1 if this fault was an artefact of the
2942 * shadow code (and the guest should retry) or 0 if it is not (and the
2943 * fault should be handled elsewhere or passed to the guest). */
2945 static int sh_page_fault(struct vcpu *v,
2946 unsigned long va,
2947 struct cpu_user_regs *regs)
2949 struct domain *d = v->domain;
2950 walk_t gw;
2951 u32 accumulated_gflags;
2952 gfn_t gfn;
2953 mfn_t gmfn, sl1mfn=_mfn(0);
2954 shadow_l1e_t sl1e, *ptr_sl1e;
2955 paddr_t gpa;
2956 struct cpu_user_regs emul_regs;
2957 struct x86_emulate_ctxt emul_ctxt;
2958 int r, mmio;
2959 fetch_type_t ft = 0;
2961 //
2962 // XXX: Need to think about eventually mapping superpages directly in the
2963 // shadow (when possible), as opposed to splintering them into a
2964 // bunch of 4K maps.
2965 //
2967 shadow_lock(d);
2969 SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
2970 v->domain->domain_id, v->vcpu_id, va, regs->error_code);
2972 shadow_audit_tables(v);
2974 if ( guest_walk_tables(v, va, &gw, 1) != 0 )
2976 SHADOW_PRINTK("malformed guest pagetable!");
2977 print_gw(&gw);
2980 sh_audit_gw(v, &gw);
2982 // We do not look at the gw->l1e, as that will not exist for superpages.
2983 // Instead, we use the gw->eff_l1e...
2984 //
2985 // We need not check all the levels of the guest page table entries for
2986 // present vs not-present, as the eff_l1e will always be not present if
2987 // one of the higher level entries is not present.
2988 //
2989 if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
2991 if ( hvm_guest(v) && !shadow_vcpu_mode_translate(v) )
2993 /* Not present in p2m map, means this is mmio */
2994 gpa = va;
2995 goto mmio;
2998 perfc_incrc(shadow_fault_bail_not_present);
2999 goto not_a_shadow_fault;
3002 // All levels of the guest page table are now known to be present.
3003 accumulated_gflags = accumulate_guest_flags(v, &gw);
3005 // Check for attempts to access supervisor-only pages from user mode,
3006 // i.e. ring 3. Such errors are not caused or dealt with by the shadow
3007 // code.
3008 //
3009 if ( (regs->error_code & PFEC_user_mode) &&
3010 !(accumulated_gflags & _PAGE_USER) )
3012 /* illegal user-mode access to supervisor-only page */
3013 perfc_incrc(shadow_fault_bail_user_supervisor);
3014 goto not_a_shadow_fault;
3017 // Was it a write fault?
3018 //
3019 if ( regs->error_code & PFEC_write_access )
3021 if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
3023 perfc_incrc(shadow_fault_bail_ro_mapping);
3024 goto not_a_shadow_fault;
3027 else // must have been either an insn fetch or read fault
3029 // Check for NX bit violations: attempts to execute code that is
3030 // marked "do not execute". Such errors are not caused or dealt with
3031 // by the shadow code.
3032 //
3033 if ( regs->error_code & PFEC_insn_fetch )
3035 if ( accumulated_gflags & _PAGE_NX_BIT )
3037 /* NX prevented this code fetch */
3038 perfc_incrc(shadow_fault_bail_nx);
3039 goto not_a_shadow_fault;
3044 /* Is this an MMIO access? */
3045 gfn = guest_l1e_get_gfn(gw.eff_l1e);
3046 mmio = ( hvm_guest(v)
3047 && shadow_vcpu_mode_translate(v)
3048 && mmio_space(gfn_to_paddr(gfn)) );
3050 /* For MMIO, the shadow holds the *gfn*; for normal accesses, it holds
3051 * the equivalent mfn. */
3052 if ( mmio )
3053 gmfn = _mfn(gfn_x(gfn));
3054 else
3056 gmfn = vcpu_gfn_to_mfn(v, gfn);
3057 if ( !valid_mfn(gmfn) )
3059 perfc_incrc(shadow_fault_bail_bad_gfn);
3060 SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n",
3061 gfn_x(gfn), mfn_x(gmfn));
3062 goto not_a_shadow_fault;
3066 /* Make sure there is enough free shadow memory to build a chain of
3067 * shadow tables: one SHADOW_MAX_ORDER chunk will always be enough
3068 * to allocate all we need. (We never allocate a top-level shadow
3069 * on this path, only a 32b l1, pae l2+1 or 64b l3+2+1) */
3070 shadow_prealloc(d, SHADOW_MAX_ORDER);
3072 /* Acquire the shadow. This must happen before we figure out the rights
3073 * for the shadow entry, since we might promote a page here. */
3074 // XXX -- this code will need to change somewhat if/when the shadow code
3075 // can directly map superpages...
3076 ft = ((regs->error_code & PFEC_write_access) ?
3077 ft_demand_write : ft_demand_read);
3078 ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft);
3079 ASSERT(ptr_sl1e);
3081 /* Calculate the shadow entry */
3082 if ( ft == ft_demand_write )
3084 if ( l1e_write_fault(v, &gw, gmfn, &sl1e, mmio) )
3086 perfc_incrc(shadow_fault_emulate_write);
3087 goto emulate;
3090 else if ( l1e_read_fault(v, &gw, gmfn, &sl1e, mmio) )
3092 perfc_incrc(shadow_fault_emulate_read);
3093 goto emulate;
3096 /* Quick sanity check: we never make an MMIO entry that's got the
3097 * _PAGE_PRESENT flag set in it. */
3098 ASSERT(!mmio || !(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT));
3100 r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
3102 if ( mmio )
3104 gpa = guest_walk_to_gpa(&gw);
3105 goto mmio;
3108 #if 0
3109 if ( !(r & SHADOW_SET_CHANGED) )
3110 debugtrace_printk("%s: shadow_set_l1e(va=%p, sl1e=%" SH_PRI_pte
3111 ") did not change anything\n",
3112 __func__, gw.va, l1e_get_intpte(sl1e));
3113 #endif
3115 perfc_incrc(shadow_fault_fixed);
3116 d->arch.shadow.fault_count++;
3117 reset_early_unshadow(v);
3119 done:
3120 sh_audit_gw(v, &gw);
3121 unmap_walk(v, &gw);
3122 SHADOW_PRINTK("fixed\n");
3123 shadow_audit_tables(v);
3124 shadow_unlock(d);
3125 return EXCRET_fault_fixed;
3127 emulate:
3129 /* Take the register set we were called with */
3130 emul_regs = *regs;
3131 if ( hvm_guest(v) )
3133 /* Add the guest's segment selectors, rip, rsp. rflags */
3134 hvm_store_cpu_guest_regs(v, &emul_regs, NULL);
3136 emul_ctxt.regs = &emul_regs;
3137 emul_ctxt.cr2 = va;
3138 emul_ctxt.mode = hvm_guest(v) ? hvm_guest_x86_mode(v) : X86EMUL_MODE_HOST;
3140 SHADOW_PRINTK("emulate: eip=%#lx\n", emul_regs.eip);
3142 v->arch.shadow.propagate_fault = 0;
3144 /*
3145 * We do not emulate user writes. Instead we use them as a hint that the
3146 * page is no longer a page table. This behaviour differs from native, but
3147 * it seems very unlikely that any OS grants user access to page tables.
3148 */
3149 if ( (regs->error_code & PFEC_user_mode) ||
3150 x86_emulate_memop(&emul_ctxt, &shadow_emulator_ops) )
3152 SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n",
3153 mfn_x(gmfn));
3154 perfc_incrc(shadow_fault_emulate_failed);
3155 /* If this is actually a page table, then we have a bug, and need
3156 * to support more operations in the emulator. More likely,
3157 * though, this is a hint that this page should not be shadowed. */
3158 shadow_remove_all_shadows(v, gmfn);
3159 /* This means that actual missing operations will cause the
3160 * guest to loop on the same page fault. */
3161 goto done;
3164 /* Emulation triggered another page fault? */
3165 if ( v->arch.shadow.propagate_fault )
3166 goto not_a_shadow_fault;
3168 /* Emulator has changed the user registers: write back */
3169 if ( hvm_guest(v) )
3171 /* Write back the guest's segment selectors, rip, rsp. rflags */
3172 hvm_load_cpu_guest_regs(v, &emul_regs);
3173 /* And don't overwrite those in the caller's regs. */
3174 emul_regs.eip = regs->eip;
3175 emul_regs.cs = regs->cs;
3176 emul_regs.eflags = regs->eflags;
3177 emul_regs.esp = regs->esp;
3178 emul_regs.ss = regs->ss;
3179 emul_regs.es = regs->es;
3180 emul_regs.ds = regs->ds;
3181 emul_regs.fs = regs->fs;
3182 emul_regs.gs = regs->gs;
3184 *regs = emul_regs;
3186 goto done;
3188 mmio:
3189 perfc_incrc(shadow_fault_mmio);
3190 if ( !hvm_apic_support(d) && (gpa >= 0xFEC00000) )
3192 /* Need to deal with these disabled-APIC accesses, as
3193 * handle_mmio() apparently does not currently do that. */
3194 /* TJD: What about it, then? For now, I'm turning this BUG()
3195 * into a domain_crash() since we don't want to kill Xen. */
3196 SHADOW_ERROR("disabled-APIC access: not supported\n.");
3197 domain_crash(d);
3199 sh_audit_gw(v, &gw);
3200 unmap_walk(v, &gw);
3201 SHADOW_PRINTK("mmio\n");
3202 shadow_audit_tables(v);
3203 reset_early_unshadow(v);
3204 shadow_unlock(d);
3205 sh_log_mmio(v, gpa);
3206 handle_mmio(va, gpa);
3207 return EXCRET_fault_fixed;
3209 not_a_shadow_fault:
3210 sh_audit_gw(v, &gw);
3211 unmap_walk(v, &gw);
3212 SHADOW_PRINTK("not a shadow fault\n");
3213 shadow_audit_tables(v);
3214 reset_early_unshadow(v);
3215 shadow_unlock(d);
3216 return 0;
3220 static int
3221 sh_invlpg(struct vcpu *v, unsigned long va)
3222 /* Called when the guest requests an invlpg. Returns 1 if the invlpg
3223 * instruction should be issued on the hardware, or 0 if it's safe not
3224 * to do so. */
3226 shadow_l2e_t *ptr_sl2e = shadow_get_l2e(v, va);
3228 // XXX -- might be a good thing to prefetch the va into the shadow
3230 // no need to flush anything if there's no SL2...
3231 //
3232 if ( !ptr_sl2e )
3233 return 0;
3235 // If there's nothing shadowed for this particular sl2e, then
3236 // there is no need to do an invlpg, either...
3237 //
3238 if ( !(shadow_l2e_get_flags(*ptr_sl2e) & _PAGE_PRESENT) )
3239 return 0;
3241 // Check to see if the SL2 is a splintered superpage...
3242 // If so, then we'll need to flush the entire TLB (because that's
3243 // easier than invalidating all of the individual 4K pages).
3244 //
3245 if ( (mfn_to_page(shadow_l2e_get_mfn(*ptr_sl2e))->count_info &
3246 PGC_SH_type_mask) == PGC_SH_fl1_shadow )
3248 local_flush_tlb();
3249 return 0;
3252 return 1;
3255 static unsigned long
3256 sh_gva_to_gfn(struct vcpu *v, unsigned long va)
3257 /* Called to translate a guest virtual address to what the *guest*
3258 * pagetables would map it to. */
3260 walk_t gw;
3261 gfn_t gfn;
3263 guest_walk_tables(v, va, &gw, 0);
3264 gfn = guest_walk_to_gfn(&gw);
3265 unmap_walk(v, &gw);
3267 return gfn_x(gfn);
3271 static unsigned long
3272 sh_gva_to_gpa(struct vcpu *v, unsigned long va)
3273 /* Called to translate a guest virtual address to what the *guest*
3274 * pagetables would map it to. */
3276 unsigned long gfn = sh_gva_to_gfn(v, va);
3277 if ( gfn == INVALID_GFN )
3278 return 0;
3279 else
3280 return (gfn << PAGE_SHIFT) | (va & ~PAGE_MASK);
3284 // XXX -- should this be in this file?
3285 // Or should it be moved to shadow-common.c?
3286 //
3287 /* returns a lowmem machine address of the copied HVM L3 root table
3288 * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
3289 * otherwise blank out any entries with reserved bits in them. */
3290 #if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
3291 static unsigned long
3292 hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
3294 int i, f;
3295 int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
3296 l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
3297 memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
3298 for ( i = 0; i < 4; i++ )
3300 f = l3e_get_flags(l3tab[i]);
3301 if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
3302 new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
3303 else
3304 new_l3e = l3e_empty();
3305 safe_write_entry(&copy[i], &new_l3e);
3307 return __pa(copy);
3309 #endif
3312 static inline void
3313 sh_update_linear_entries(struct vcpu *v)
3314 /* Sync up all the linear mappings for this vcpu's pagetables */
3316 struct domain *d = v->domain;
3318 /* Linear pagetables in PV guests
3319 * ------------------------------
3321 * Guest linear pagetables, which map the guest pages, are at
3322 * LINEAR_PT_VIRT_START. Shadow linear pagetables, which map the
3323 * shadows, are at SH_LINEAR_PT_VIRT_START. Most of the time these
3324 * are set up at shadow creation time, but (of course!) the PAE case
3325 * is subtler. Normal linear mappings are made by having an entry
3326 * in the top-level table that points to itself (shadow linear) or
3327 * to the guest top-level table (guest linear). For PAE, to set up
3328 * a linear map requires us to copy the four top-level entries into
3329 * level-2 entries. That means that every time we change a PAE l3e,
3330 * we need to reflect the change into the copy.
3332 * Linear pagetables in HVM guests
3333 * -------------------------------
3335 * For HVM guests, the linear pagetables are installed in the monitor
3336 * tables (since we can't put them in the shadow). Shadow linear
3337 * pagetables, which map the shadows, are at SH_LINEAR_PT_VIRT_START,
3338 * and we use the linear pagetable slot at LINEAR_PT_VIRT_START for
3339 * a linear pagetable of the monitor tables themselves. We have
3340 * the same issue of having to re-copy PAE l3 entries whevever we use
3341 * PAE shadows.
3343 * Because HVM guests run on the same monitor tables regardless of the
3344 * shadow tables in use, the linear mapping of the shadow tables has to
3345 * be updated every time v->arch.shadow_table changes.
3346 */
3348 /* Don't try to update the monitor table if it doesn't exist */
3349 if ( shadow_mode_external(d)
3350 && pagetable_get_pfn(v->arch.monitor_table) == 0 )
3351 return;
3353 #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 4)
3355 /* For PV, one l4e points at the guest l4, one points at the shadow
3356 * l4. No maintenance required.
3357 * For HVM, just need to update the l4e that points to the shadow l4. */
3359 if ( shadow_mode_external(d) )
3361 /* Use the linear map if we can; otherwise make a new mapping */
3362 if ( v == current )
3364 __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] =
3365 l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
3366 __PAGE_HYPERVISOR);
3368 else
3370 l4_pgentry_t *ml4e;
3371 ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
3372 ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
3373 l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
3374 __PAGE_HYPERVISOR);
3375 sh_unmap_domain_page(ml4e);
3379 #elif (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 3)
3381 /* This case only exists in HVM. To give ourselves a linear map of the
3382 * shadows, we need to extend a PAE shadow to 4 levels. We do this by
3383 * having a monitor l3 in slot 0 of the monitor l4 table, and
3384 * copying the PAE l3 entries into it. Then, by having the monitor l4e
3385 * for shadow pagetables also point to the monitor l4, we can use it
3386 * to access the shadows. */
3388 if ( shadow_mode_external(d) )
3390 /* Install copies of the shadow l3es into the monitor l3 table.
3391 * The monitor l3 table is hooked into slot 0 of the monitor
3392 * l4 table, so we use l3 linear indices 0 to 3 */
3393 shadow_l3e_t *sl3e;
3394 l3_pgentry_t *ml3e;
3395 mfn_t l3mfn;
3396 int i;
3398 /* Use linear mappings if we can; otherwise make new mappings */
3399 if ( v == current )
3401 ml3e = __linear_l3_table;
3402 l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
3404 else
3406 l4_pgentry_t *ml4e;
3407 ml4e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
3408 ASSERT(l4e_get_flags(ml4e[0]) & _PAGE_PRESENT);
3409 l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
3410 ml3e = sh_map_domain_page(l3mfn);
3411 sh_unmap_domain_page(ml4e);
3414 #if GUEST_PAGING_LEVELS == 2
3415 /* Shadow l3 tables are made up by update_cr3 */
3416 sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
3417 #else
3418 /* Always safe to use shadow_vtable, because it's globally mapped */
3419 sl3e = v->arch.shadow_vtable;
3420 #endif
3422 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
3424 ml3e[i] =
3425 (shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT)
3426 ? l3e_from_pfn(mfn_x(shadow_l3e_get_mfn(sl3e[i])),
3427 __PAGE_HYPERVISOR)
3428 : l3e_empty();
3431 if ( v != current )
3432 sh_unmap_domain_page(ml3e);
3435 #elif CONFIG_PAGING_LEVELS == 3
3437 /* PV: need to copy the guest's l3 entries into the guest-linear-map l2
3438 * entries in the shadow, and the shadow's l3 entries into the
3439 * shadow-linear-map l2 entries in the shadow. This is safe to do
3440 * because Xen does not let guests share high-slot l2 tables between l3s,
3441 * so we know we're not treading on anyone's toes.
3443 * HVM: need to copy the shadow's l3 entries into the
3444 * shadow-linear-map l2 entries in the monitor table. This is safe
3445 * because we have one monitor table for each vcpu. The monitor's
3446 * own l3es don't need to be copied because they never change.
3447 * XXX That might change if we start stuffing things into the rest
3448 * of the monitor's virtual address space.
3449 */
3451 l2_pgentry_t *l2e, new_l2e;
3452 shadow_l3e_t *guest_l3e = NULL, *shadow_l3e;
3453 int i;
3454 int unmap_l2e = 0;
3456 #if GUEST_PAGING_LEVELS == 2
3457 /* Shadow l3 tables were built by update_cr3 */
3458 if ( shadow_mode_external(d) )
3459 shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
3460 else
3461 BUG(); /* PV 2-on-3 is not supported yet */
3463 #else /* GUEST_PAGING_LEVELS == 3 */
3465 /* Always safe to use *_vtable, because they're globally mapped */
3466 shadow_l3e = v->arch.shadow_vtable;
3467 guest_l3e = v->arch.guest_vtable;
3469 #endif /* GUEST_PAGING_LEVELS */
3471 /* Choose where to write the entries, using linear maps if possible */
3472 if ( shadow_mode_external(d) )
3474 if ( v == current )
3476 /* From the monitor tables, it's safe to use linear maps
3477 * to update monitor l2s */
3478 l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
3480 else
3482 /* Map the monitor table's high l2 */
3483 l3_pgentry_t *l3e;
3484 l3e = sh_map_domain_page(
3485 pagetable_get_mfn(v->arch.monitor_table));
3486 ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
3487 l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
3488 unmap_l2e = 1;
3489 sh_unmap_domain_page(l3e);
3492 else
3494 /* Map the shadow table's high l2 */
3495 ASSERT(shadow_l3e_get_flags(shadow_l3e[3]) & _PAGE_PRESENT);
3496 l2e = sh_map_domain_page(shadow_l3e_get_mfn(shadow_l3e[3]));
3497 unmap_l2e = 1;
3500 /* Write linear mapping of guest (only in PV, and only when
3501 * not translated). */
3502 if ( !shadow_mode_translate(d) )
3504 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
3506 new_l2e =
3507 ((shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT)
3508 ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
3509 __PAGE_HYPERVISOR)
3510 : l2e_empty());
3511 safe_write_entry(
3512 &l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i],
3513 &new_l2e);
3517 /* Write linear mapping of shadow. */
3518 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
3520 new_l2e = (shadow_l3e_get_flags(shadow_l3e[i]) & _PAGE_PRESENT)
3521 ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(shadow_l3e[i])),
3522 __PAGE_HYPERVISOR)
3523 : l2e_empty();
3524 safe_write_entry(
3525 &l2e[l2_table_offset(SH_LINEAR_PT_VIRT_START) + i],
3526 &new_l2e);
3529 if ( unmap_l2e )
3530 sh_unmap_domain_page(l2e);
3533 #elif CONFIG_PAGING_LEVELS == 2
3535 /* For PV, one l2e points at the guest l2, one points at the shadow
3536 * l2. No maintenance required.
3537 * For HVM, just need to update the l2e that points to the shadow l2. */
3539 if ( shadow_mode_external(d) )
3541 /* Use the linear map if we can; otherwise make a new mapping */
3542 if ( v == current )
3544 __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] =
3545 l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
3546 __PAGE_HYPERVISOR);
3548 else
3550 l2_pgentry_t *ml2e;
3551 ml2e = sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
3552 ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
3553 l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
3554 __PAGE_HYPERVISOR);
3555 sh_unmap_domain_page(ml2e);
3559 #else
3560 #error this should not happen
3561 #endif
3565 // XXX -- should this be in this file?
3566 // Or should it be moved to shadow-common.c?
3567 //
3568 #if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
3569 void sh_pae_recopy(struct domain *d)
3570 /* Called whenever we write to the l3 entries of a PAE pagetable which
3571 * is currently in use. Each vcpu that is using the table needs to
3572 * resync its copies of the l3s in linear maps and any low-memory
3573 * copies it might have made for fitting into 32bit CR3.
3574 * Since linear maps are also resynced when we change CR3, we don't
3575 * need to worry about changes to PAE l3es that are not currently in use.*/
3577 struct vcpu *v;
3578 cpumask_t flush_mask = CPU_MASK_NONE;
3579 ASSERT(shadow_lock_is_acquired(d));
3581 for_each_vcpu(d, v)
3583 if ( !v->arch.shadow.pae_flip_pending )
3584 continue;
3586 cpu_set(v->processor, flush_mask);
3588 SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
3590 /* This vcpu has a copy in its linear maps */
3591 sh_update_linear_entries(v);
3592 if ( hvm_guest(v) )
3594 /* This vcpu has a copy in its HVM PAE l3 */
3595 v->arch.hvm_vcpu.hw_cr3 =
3596 hvm_pae_copy_root(v, v->arch.shadow_vtable,
3597 !shadow_vcpu_mode_translate(v));
3599 #if CONFIG_PAGING_LEVELS == 3
3600 else
3602 /* This vcpu might have copied the l3 to below 4GB */
3603 if ( v->arch.cr3 >> PAGE_SHIFT
3604 != pagetable_get_pfn(v->arch.shadow_table) )
3606 /* Recopy to where that copy is. */
3607 int i;
3608 l3_pgentry_t *dst, *src;
3609 dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
3610 src = v->arch.shadow_vtable;
3611 for ( i = 0 ; i < 4 ; i++ )
3612 safe_write_entry(dst + i, src + i);
3615 #endif
3616 v->arch.shadow.pae_flip_pending = 0;
3619 flush_tlb_mask(flush_mask);
3621 #endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
3624 /* removes:
3625 * vcpu->arch.guest_vtable
3626 * vcpu->arch.shadow_table
3627 * vcpu->arch.shadow_vtable
3628 * Does all appropriate management/bookkeeping/refcounting/etc...
3629 */
3630 static void
3631 sh_detach_old_tables(struct vcpu *v)
3633 struct domain *d = v->domain;
3634 mfn_t smfn;
3636 ////
3637 //// vcpu->arch.guest_vtable
3638 ////
3639 if ( v->arch.guest_vtable )
3641 #if GUEST_PAGING_LEVELS == 4
3642 if ( shadow_mode_external(d) || shadow_mode_translate(d) )
3643 sh_unmap_domain_page_global(v->arch.guest_vtable);
3644 #elif GUEST_PAGING_LEVELS == 3
3645 if ( 1 || shadow_mode_external(d) || shadow_mode_translate(d) )
3646 sh_unmap_domain_page_global(v->arch.guest_vtable);
3647 #elif GUEST_PAGING_LEVELS == 2
3648 if ( shadow_mode_external(d) || shadow_mode_translate(d) )
3649 sh_unmap_domain_page_global(v->arch.guest_vtable);
3650 #endif
3651 v->arch.guest_vtable = NULL;
3654 ////
3655 //// vcpu->arch.shadow_table
3656 ////
3657 smfn = pagetable_get_mfn(v->arch.shadow_table);
3658 if ( mfn_x(smfn) )
3660 ASSERT(v->arch.shadow_vtable);
3662 #if GUEST_PAGING_LEVELS == 3
3663 // PAE guests do not (necessarily) use an entire page for their
3664 // 4-entry L3s, so we have to deal with them specially.
3665 //
3666 sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
3667 #else
3668 sh_put_ref(v, smfn, 0);
3669 #endif
3671 #if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
3673 struct pae_l3_bookkeeping *info =
3674 sl3p_to_info(v->arch.shadow_vtable);
3675 ASSERT(test_bit(v->vcpu_id, &info->vcpus));
3676 clear_bit(v->vcpu_id, &info->vcpus);
3678 #endif
3679 v->arch.shadow_table = pagetable_null();
3682 ////
3683 //// vcpu->arch.shadow_vtable
3684 ////
3685 if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
3686 v->arch.shadow_vtable )
3688 // Q: why does this need to use (un)map_domain_page_*global* ?
3689 /* A: so sh_update_linear_entries can operate on other vcpus */
3690 sh_unmap_domain_page_global(v->arch.shadow_vtable);
3691 v->arch.shadow_vtable = NULL;
3695 static void
3696 sh_update_cr3(struct vcpu *v)
3697 /* Updates vcpu->arch.shadow_table after the guest has changed CR3.
3698 * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
3699 * if appropriate).
3700 * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
3701 */
3703 struct domain *d = v->domain;
3704 mfn_t gmfn, smfn;
3705 #if GUEST_PAGING_LEVELS == 3
3706 u32 guest_idx=0;
3707 #endif
3709 ASSERT(shadow_lock_is_acquired(v->domain));
3710 ASSERT(v->arch.shadow.mode);
3712 ////
3713 //// vcpu->arch.guest_table is already set
3714 ////
3716 #ifndef NDEBUG
3717 /* Double-check that the HVM code has sent us a sane guest_table */
3718 if ( hvm_guest(v) )
3720 gfn_t gfn;
3722 ASSERT(shadow_mode_external(d));
3724 // Is paging enabled on this vcpu?
3725 if ( shadow_vcpu_mode_translate(v) )
3727 gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3)));
3728 gmfn = vcpu_gfn_to_mfn(v, gfn);
3729 ASSERT(valid_mfn(gmfn));
3730 ASSERT(pagetable_get_pfn(v->arch.guest_table) == mfn_x(gmfn));
3732 else
3734 /* Paging disabled: guest_table points at (part of) p2m */
3735 #if SHADOW_PAGING_LEVELS != 3 /* in 3-on-4, guest-table is in slot 0 of p2m */
3736 /* For everything else, they sould be the same */
3737 ASSERT(v->arch.guest_table.pfn == d->arch.phys_table.pfn);
3738 #endif
3741 #endif
3743 SHADOW_PRINTK("d=%u v=%u guest_table=%05lx\n",
3744 d->domain_id, v->vcpu_id,
3745 (unsigned long)pagetable_get_pfn(v->arch.guest_table));
3747 #if GUEST_PAGING_LEVELS == 4
3748 if ( !(v->arch.flags & TF_kernel_mode) )
3749 gmfn = pagetable_get_mfn(v->arch.guest_table_user);
3750 else
3751 #endif
3752 gmfn = pagetable_get_mfn(v->arch.guest_table);
3754 sh_detach_old_tables(v);
3756 if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
3758 ASSERT(v->arch.cr3 == 0);
3759 return;
3762 ////
3763 //// vcpu->arch.guest_vtable
3764 ////
3765 #if GUEST_PAGING_LEVELS == 4
3766 if ( shadow_mode_external(d) || shadow_mode_translate(d) )
3767 v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
3768 else
3769 v->arch.guest_vtable = __linear_l4_table;
3770 #elif GUEST_PAGING_LEVELS == 3
3771 if ( shadow_mode_external(d) )
3773 if ( shadow_vcpu_mode_translate(v) )
3774 /* Paging enabled: find where in the page the l3 table is */
3775 guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3));
3776 else
3777 /* Paging disabled: l3 is at the start of a page (in the p2m) */
3778 guest_idx = 0;
3780 // Ignore the low 2 bits of guest_idx -- they are really just
3781 // cache control.
3782 guest_idx &= ~3;
3784 // XXX - why does this need a global map?
3785 v->arch.guest_vtable =
3786 (guest_l3e_t *)sh_map_domain_page_global(gmfn) + guest_idx;
3788 else
3789 v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
3790 #elif GUEST_PAGING_LEVELS == 2
3791 if ( shadow_mode_external(d) || shadow_mode_translate(d) )
3792 v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
3793 else
3794 v->arch.guest_vtable = __linear_l2_table;
3795 #else
3796 #error this should never happen
3797 #endif
3799 #if 0
3800 printk("%s %s %d gmfn=%05lx guest_vtable=%p\n",
3801 __func__, __FILE__, __LINE__, gmfn, v->arch.guest_vtable);
3802 #endif
3804 ////
3805 //// vcpu->arch.shadow_table
3806 ////
3807 smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type);
3808 if ( valid_mfn(smfn) )
3810 /* Pull this root shadow to the front of the list of roots. */
3811 list_del(&mfn_to_page(smfn)->list);
3812 list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
3814 else
3816 /* This guest MFN is a pagetable. Must revoke write access. */
3817 if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0)
3818 != 0 )
3819 flush_tlb_mask(d->domain_dirty_cpumask);
3820 /* Make sure there's enough free shadow memory. */
3821 shadow_prealloc(d, SHADOW_MAX_ORDER);
3822 /* Shadow the page. */
3823 smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type);
3824 list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
3826 ASSERT(valid_mfn(smfn));
3827 v->arch.shadow_table = pagetable_from_mfn(smfn);
3829 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
3830 /* Once again OK to unhook entries from this table if we see fork/exit */
3831 ASSERT(sh_mfn_is_a_page_table(gmfn));
3832 mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
3833 #endif
3836 ////
3837 //// vcpu->arch.shadow_vtable
3838 ////
3839 if ( shadow_mode_external(d) )
3841 #if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
3842 mfn_t adjusted_smfn = smfn;
3843 u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
3844 // Q: why does this need to use (un)map_domain_page_*global* ?
3845 v->arch.shadow_vtable =
3846 (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) +
3847 shadow_idx;
3848 #else
3849 // Q: why does this need to use (un)map_domain_page_*global* ?
3850 v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
3851 #endif
3853 else
3855 #if SHADOW_PAGING_LEVELS == 4
3856 v->arch.shadow_vtable = __sh_linear_l4_table;
3857 #elif GUEST_PAGING_LEVELS == 3
3858 // XXX - why does this need a global map?
3859 v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
3860 #else
3861 v->arch.shadow_vtable = __sh_linear_l2_table;
3862 #endif
3865 #if (CONFIG_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
3866 // Now that shadow_vtable is in place, check that the sl3e[3] is properly
3867 // shadowed and installed in PAE PV guests...
3868 if ( !shadow_mode_external(d) &&
3869 !(shadow_l3e_get_flags(((shadow_l3e_t *)v->arch.shadow_vtable)[3]) &
3870 _PAGE_PRESENT) )
3872 sh_install_xen_entries_in_l3(v, gmfn, smfn);
3874 #endif
3876 ////
3877 //// Take a ref to the new shadow table, and pin it.
3878 ////
3879 //
3880 // This ref is logically "held" by v->arch.shadow_table entry itself.
3881 // Release the old ref.
3882 //
3883 #if GUEST_PAGING_LEVELS == 3
3884 // PAE guests do not (necessarily) use an entire page for their
3885 // 4-entry L3s, so we have to deal with them specially.
3886 //
3887 // XXX - might want to revisit this if/when we do multiple compilation for
3888 // HVM-vs-PV guests, as PAE PV guests could get away without doing
3889 // subshadows.
3890 //
3891 sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
3892 sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
3893 #else
3894 sh_get_ref(smfn, 0);
3895 sh_pin(smfn);
3896 #endif
3898 #if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
3899 // PAE 3-on-3 shadows have to keep track of which vcpu's are using
3900 // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY
3901 // case from validate_gl3e(). Search for SHADOW_SET_L3PAE_RECOPY
3902 // in the code for more info.
3903 //
3905 struct pae_l3_bookkeeping *info =
3906 sl3p_to_info(v->arch.shadow_vtable);
3907 ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
3908 set_bit(v->vcpu_id, &info->vcpus);
3910 #endif
3912 debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
3913 __func__, gmfn, smfn);
3915 ///
3916 /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
3917 ///
3918 if ( shadow_mode_external(d) )
3920 ASSERT(hvm_guest(v));
3921 make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
3923 #if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
3924 #if SHADOW_PAGING_LEVELS != 3
3925 #error unexpected combination of GUEST and SHADOW paging levels
3926 #endif
3927 /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
3929 mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
3930 int i;
3932 ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
3933 virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
3934 for (i = 0; i < 4; i++)
3936 v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
3937 shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
3940 #elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
3941 /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
3942 * If paging is disabled, clear l3e reserved bits; otherwise
3943 * remove entries that have reserved bits set. */
3944 v->arch.hvm_vcpu.hw_cr3 =
3945 hvm_pae_copy_root(v, v->arch.shadow_vtable,
3946 !shadow_vcpu_mode_translate(v));
3947 #else
3948 /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
3949 v->arch.hvm_vcpu.hw_cr3 =
3950 pagetable_get_paddr(v->arch.shadow_table);
3951 #endif
3953 else // not shadow_mode_external...
3955 /* We don't support PV except guest == shadow == config levels */
3956 BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
3957 make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
3960 /* Fix up the linear pagetable mappings */
3961 sh_update_linear_entries(v);
3965 /**************************************************************************/
3966 /* Functions to revoke guest rights */
3968 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
3969 static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
3970 /* Look up this vaddr in the current shadow and see if it's a writeable
3971 * mapping of this gmfn. If so, remove it. Returns 1 if it worked. */
3973 shadow_l1e_t sl1e, *sl1p;
3974 shadow_l2e_t *sl2p;
3975 #if GUEST_PAGING_LEVELS >= 3
3976 shadow_l3e_t *sl3p;
3977 #if GUEST_PAGING_LEVELS >= 4
3978 shadow_l4e_t *sl4p;
3979 #endif
3980 #endif
3981 mfn_t sl1mfn;
3984 /* Carefully look in the shadow linear map for the l1e we expect */
3985 if ( v->arch.shadow_vtable == NULL ) return 0;
3986 #if GUEST_PAGING_LEVELS >= 4
3987 sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
3988 if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
3989 return 0;
3990 sl3p = sh_linear_l3_table(v) + shadow_l3_linear_offset(vaddr);
3991 if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
3992 return 0;
3993 #elif GUEST_PAGING_LEVELS == 3
3994 sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable)
3995 + shadow_l3_linear_offset(vaddr);
3996 if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
3997 return 0;
3998 #endif
3999 sl2p = sh_linear_l2_table(v) + shadow_l2_linear_offset(vaddr);
4000 if ( !(shadow_l2e_get_flags(*sl2p) & _PAGE_PRESENT) )
4001 return 0;
4002 sl1p = sh_linear_l1_table(v) + shadow_l1_linear_offset(vaddr);
4003 sl1e = *sl1p;
4004 if ( ((shadow_l1e_get_flags(sl1e) & (_PAGE_PRESENT|_PAGE_RW))
4005 != (_PAGE_PRESENT|_PAGE_RW))
4006 || (mfn_x(shadow_l1e_get_mfn(sl1e)) != mfn_x(gmfn)) )
4007 return 0;
4009 /* Found it! Need to remove its write permissions. */
4010 sl1mfn = shadow_l2e_get_mfn(*sl2p);
4011 sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
4012 shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
4013 return 1;
4015 #endif
4017 int sh_remove_write_access(struct vcpu *v, mfn_t sl1mfn, mfn_t readonly_mfn)
4018 /* Excises all writeable mappings to readonly_mfn from this l1 shadow table */
4020 shadow_l1e_t *sl1e;
4021 int done = 0;
4022 int flags;
4024 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done,
4026 flags = shadow_l1e_get_flags(*sl1e);
4027 if ( (flags & _PAGE_PRESENT)
4028 && (flags & _PAGE_RW)
4029 && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) )
4031 shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
4032 if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info
4033 & PGT_count_mask) == 0 )
4034 /* This breaks us cleanly out of the FOREACH macro */
4035 done = 1;
4037 });
4038 return done;
4042 int sh_remove_all_mappings(struct vcpu *v, mfn_t sl1mfn, mfn_t target_mfn)
4043 /* Excises all mappings to guest frame from this shadow l1 table */
4045 shadow_l1e_t *sl1e;
4046 int done = 0;
4047 int flags;
4049 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done,
4051 flags = shadow_l1e_get_flags(*sl1e);
4052 if ( (flags & _PAGE_PRESENT)
4053 && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(target_mfn)) )
4055 shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn);
4056 if ( (mfn_to_page(target_mfn)->count_info & PGC_count_mask) == 0 )
4057 /* This breaks us cleanly out of the FOREACH macro */
4058 done = 1;
4060 });
4061 return done;
4064 /**************************************************************************/
4065 /* Functions to excise all pointers to shadows from higher-level shadows. */
4067 void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
4068 /* Blank out a single shadow entry */
4070 switch (mfn_to_page(smfn)->count_info & PGC_SH_type_mask)
4072 case PGC_SH_l1_shadow:
4073 shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
4074 case PGC_SH_l2_shadow:
4075 #if GUEST_PAGING_LEVELS == 3
4076 case PGC_SH_l2h_shadow:
4077 #endif
4078 shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break;
4079 #if GUEST_PAGING_LEVELS >= 3
4080 case PGC_SH_l3_shadow:
4081 shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break;
4082 #if GUEST_PAGING_LEVELS >= 4
4083 case PGC_SH_l4_shadow:
4084 shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break;
4085 #endif
4086 #endif
4087 default: BUG(); /* Called with the wrong kind of shadow. */
4091 int sh_remove_l1_shadow(struct vcpu *v, mfn_t sl2mfn, mfn_t sl1mfn)
4092 /* Remove all mappings of this l1 shadow from this l2 shadow */
4094 shadow_l2e_t *sl2e;
4095 int done = 0;
4096 int flags;
4097 #if GUEST_PAGING_LEVELS != 4
4098 int xen_mappings = !shadow_mode_external(v->domain);
4099 #endif
4101 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, done, xen_mappings,
4103 flags = shadow_l2e_get_flags(*sl2e);
4104 if ( (flags & _PAGE_PRESENT)
4105 && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
4107 shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
4108 if ( (mfn_to_page(sl1mfn)->count_info & PGC_SH_type_mask) == 0 )
4109 /* This breaks us cleanly out of the FOREACH macro */
4110 done = 1;
4112 });
4113 return done;
4116 #if GUEST_PAGING_LEVELS >= 3
4117 int sh_remove_l2_shadow(struct vcpu *v, mfn_t sl3mfn, mfn_t sl2mfn)
4118 /* Remove all mappings of this l2 shadow from this l3 shadow */
4120 shadow_l3e_t *sl3e;
4121 int done = 0;
4122 int flags;
4124 SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, done,
4126 flags = shadow_l3e_get_flags(*sl3e);
4127 if ( (flags & _PAGE_PRESENT)
4128 && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
4130 shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
4131 if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) == 0 )
4132 /* This breaks us cleanly out of the FOREACH macro */
4133 done = 1;
4135 });
4136 return done;
4139 #if GUEST_PAGING_LEVELS >= 4
4140 int sh_remove_l3_shadow(struct vcpu *v, mfn_t sl4mfn, mfn_t sl3mfn)
4141 /* Remove all mappings of this l3 shadow from this l4 shadow */
4143 shadow_l4e_t *sl4e;
4144 int done = 0;
4145 int flags, xen_mappings = !shadow_mode_external(v->domain);
4147 SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, done, xen_mappings,
4149 flags = shadow_l4e_get_flags(*sl4e);
4150 if ( (flags & _PAGE_PRESENT)
4151 && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
4153 shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
4154 if ( (mfn_to_page(sl3mfn)->count_info & PGC_SH_type_mask) == 0 )
4155 /* This breaks us cleanly out of the FOREACH macro */
4156 done = 1;
4158 });
4159 return done;
4161 #endif /* 64bit guest */
4162 #endif /* PAE guest */
4164 /**************************************************************************/
4165 /* Handling HVM guest writes to pagetables */
4167 /* Check that the user is allowed to perform this write.
4168 * Returns a mapped pointer to write to, and the mfn it's on,
4169 * or NULL for error. */
4170 static inline void * emulate_map_dest(struct vcpu *v,
4171 unsigned long vaddr,
4172 struct x86_emulate_ctxt *ctxt,
4173 mfn_t *mfnp)
4175 walk_t gw;
4176 u32 flags;
4177 gfn_t gfn;
4178 mfn_t mfn;
4180 guest_walk_tables(v, vaddr, &gw, 1);
4181 flags = accumulate_guest_flags(v, &gw);
4182 gfn = guest_l1e_get_gfn(gw.eff_l1e);
4183 mfn = vcpu_gfn_to_mfn(v, gfn);
4184 sh_audit_gw(v, &gw);
4185 unmap_walk(v, &gw);
4187 if ( !(flags & _PAGE_PRESENT)
4188 || !(flags & _PAGE_RW)
4189 || (!(flags & _PAGE_USER) && ring_3(ctxt->regs)) )
4191 /* This write would have faulted even on bare metal */
4192 v->arch.shadow.propagate_fault = 1;
4193 return NULL;
4196 if ( !valid_mfn(mfn) )
4198 /* Attempted a write to a bad gfn. This should never happen:
4199 * after all, we're here because this write is to a page table. */
4200 BUG();
4203 ASSERT(sh_mfn_is_a_page_table(mfn));
4204 *mfnp = mfn;
4205 return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
4208 int
4209 sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
4210 u32 bytes, struct x86_emulate_ctxt *ctxt)
4212 ASSERT(shadow_lock_is_acquired(v->domain));
4213 while ( bytes > 0 )
4215 mfn_t mfn;
4216 int bytes_on_page;
4217 void *addr;
4219 bytes_on_page = PAGE_SIZE - (vaddr & ~PAGE_MASK);
4220 if ( bytes_on_page > bytes )
4221 bytes_on_page = bytes;
4223 if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
4224 return X86EMUL_PROPAGATE_FAULT;
4225 memcpy(addr, src, bytes_on_page);
4226 shadow_validate_guest_pt_write(v, mfn, addr, bytes_on_page);
4227 bytes -= bytes_on_page;
4228 /* If we are writing zeros to this page, might want to unshadow */
4229 if ( *(u8 *)addr == 0 )
4230 check_for_early_unshadow(v, mfn);
4231 sh_unmap_domain_page(addr);
4233 shadow_audit_tables(v);
4234 return X86EMUL_CONTINUE;
4237 int
4238 sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
4239 unsigned long old, unsigned long new,
4240 unsigned int bytes, struct x86_emulate_ctxt *ctxt)
4242 mfn_t mfn;
4243 void *addr;
4244 unsigned long prev;
4245 int rv = X86EMUL_CONTINUE;
4247 ASSERT(shadow_lock_is_acquired(v->domain));
4248 ASSERT(bytes <= sizeof (unsigned long));
4250 if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
4251 return X86EMUL_PROPAGATE_FAULT;
4253 switch (bytes)
4255 case 1: prev = cmpxchg(((u8 *)addr), old, new); break;
4256 case 2: prev = cmpxchg(((u16 *)addr), old, new); break;
4257 case 4: prev = cmpxchg(((u32 *)addr), old, new); break;
4258 case 8: prev = cmpxchg(((u64 *)addr), old, new); break;
4259 default:
4260 SHADOW_PRINTK("cmpxchg of size %i is not supported\n", bytes);
4261 prev = ~old;
4264 if ( (prev == old) )
4265 shadow_validate_guest_pt_write(v, mfn, addr, bytes);
4266 else
4267 rv = X86EMUL_CMPXCHG_FAILED;
4269 SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
4270 " wanted %#lx now %#lx bytes %u\n",
4271 vaddr, prev, old, new, *(unsigned long *)addr, bytes);
4273 /* If we are writing zeros to this page, might want to unshadow */
4274 if ( *(u8 *)addr == 0 )
4275 check_for_early_unshadow(v, mfn);
4277 sh_unmap_domain_page(addr);
4278 shadow_audit_tables(v);
4279 check_for_early_unshadow(v, mfn);
4280 return rv;
4283 int
4284 sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr,
4285 unsigned long old_lo, unsigned long old_hi,
4286 unsigned long new_lo, unsigned long new_hi,
4287 struct x86_emulate_ctxt *ctxt)
4289 mfn_t mfn;
4290 void *addr;
4291 u64 old, new, prev;
4292 int rv = X86EMUL_CONTINUE;
4294 ASSERT(shadow_lock_is_acquired(v->domain));
4296 if ( (addr = emulate_map_dest(v, vaddr, ctxt, &mfn)) == NULL )
4297 return X86EMUL_PROPAGATE_FAULT;
4299 old = (((u64) old_hi) << 32) | (u64) old_lo;
4300 new = (((u64) new_hi) << 32) | (u64) new_lo;
4301 prev = cmpxchg(((u64 *)addr), old, new);
4303 if ( (prev == old) )
4304 shadow_validate_guest_pt_write(v, mfn, addr, 8);
4305 else
4306 rv = X86EMUL_CMPXCHG_FAILED;
4308 /* If we are writing zeros to this page, might want to unshadow */
4309 if ( *(u8 *)addr == 0 )
4310 check_for_early_unshadow(v, mfn);
4312 sh_unmap_domain_page(addr);
4313 shadow_audit_tables(v);
4314 check_for_early_unshadow(v, mfn);
4315 return rv;
4319 /**************************************************************************/
4320 /* Audit tools */
4322 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
4324 #define AUDIT_FAIL(_level, _fmt, _a...) do { \
4325 printk("Shadow %u-on-%u audit failed at level %i, index %i\n" \
4326 "gl" #_level "mfn = %" SH_PRI_mfn \
4327 " sl" #_level "mfn = %" SH_PRI_mfn \
4328 " &gl" #_level "e = %p &sl" #_level "e = %p" \
4329 " gl" #_level "e = %" SH_PRI_gpte \
4330 " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n", \
4331 GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, \
4332 _level, guest_index(gl ## _level ## e), \
4333 mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn), \
4334 gl ## _level ## e, sl ## _level ## e, \
4335 gl ## _level ## e->l ## _level, sl ## _level ## e->l ## _level, \
4336 ##_a); \
4337 BUG(); \
4338 done = 1; \
4339 } while (0)
4342 static char * sh_audit_flags(struct vcpu *v, int level,
4343 int gflags, int sflags)
4344 /* Common code for auditing flag bits */
4346 if ( (sflags & _PAGE_PRESENT) && !(gflags & _PAGE_PRESENT) )
4347 return "shadow is present but guest is not present";
4348 if ( (sflags & _PAGE_GLOBAL) && !hvm_guest(v) )
4349 return "global bit set in PV shadow";
4350 if ( (level == 1 || (level == 2 && (gflags & _PAGE_PSE)))
4351 && ((sflags & _PAGE_DIRTY) && !(gflags & _PAGE_DIRTY)) )
4352 return "dirty bit not propagated";
4353 if ( level == 2 && (sflags & _PAGE_PSE) )
4354 return "PS bit set in shadow";
4355 #if SHADOW_PAGING_LEVELS == 3
4356 if ( level == 3 ) return NULL; /* All the other bits are blank in PAEl3 */
4357 #endif
4358 if ( (sflags & _PAGE_USER) != (gflags & _PAGE_USER) )
4359 return "user/supervisor bit does not match";
4360 if ( (sflags & _PAGE_NX_BIT) != (gflags & _PAGE_NX_BIT) )
4361 return "NX bit does not match";
4362 if ( (sflags & _PAGE_RW) && !(gflags & _PAGE_RW) )
4363 return "shadow grants write access but guest does not";
4364 if ( (sflags & _PAGE_ACCESSED) && !(gflags & _PAGE_ACCESSED) )
4365 return "accessed bit not propagated";
4366 return NULL;
4369 static inline mfn_t
4370 audit_gfn_to_mfn(struct vcpu *v, gfn_t gfn, mfn_t gmfn)
4371 /* Convert this gfn to an mfn in the manner appropriate for the
4372 * guest pagetable it's used in (gmfn) */
4374 if ( !shadow_mode_translate(v->domain) )
4375 return _mfn(gfn_x(gfn));
4377 if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_type_mask)
4378 != PGT_writable_page )
4379 return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
4380 else
4381 return sh_gfn_to_mfn(v->domain, gfn_x(gfn));
4385 int sh_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
4387 guest_l1e_t *gl1e, *gp;
4388 shadow_l1e_t *sl1e;
4389 mfn_t mfn, gmfn, gl1mfn;
4390 gfn_t gfn;
4391 char *s;
4392 int done = 0;
4394 /* Follow the backpointer */
4395 gl1mfn = _mfn(mfn_to_page(sl1mfn)->u.inuse.type_info);
4396 gl1e = gp = sh_map_domain_page(gl1mfn);
4397 SHADOW_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, {
4399 s = sh_audit_flags(v, 1, guest_l1e_get_flags(*gl1e),
4400 shadow_l1e_get_flags(*sl1e));
4401 if ( s ) AUDIT_FAIL(1, "%s", s);
4403 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4405 gfn = guest_l1e_get_gfn(*gl1e);
4406 mfn = shadow_l1e_get_mfn(*sl1e);
4407 gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn);
4408 if ( mfn_x(gmfn) != mfn_x(mfn) )
4409 AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
4410 " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
4411 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
4413 });
4414 sh_unmap_domain_page(gp);
4415 return done;
4418 int sh_audit_fl1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
4420 guest_l1e_t *gl1e, e;
4421 shadow_l1e_t *sl1e;
4422 mfn_t gl1mfn = _mfn(INVALID_MFN);
4423 int f;
4424 int done = 0;
4426 /* fl1 has no useful backpointer: all we can check are flags */
4427 e = guest_l1e_from_gfn(_gfn(0), 0); gl1e = &e; /* Needed for macro */
4428 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, done, {
4429 f = shadow_l1e_get_flags(*sl1e);
4430 f &= ~(_PAGE_AVAIL0|_PAGE_AVAIL1|_PAGE_AVAIL2);
4431 if ( !(f == 0
4432 || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
4433 _PAGE_ACCESSED|_PAGE_DIRTY)
4434 || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)) )
4435 AUDIT_FAIL(1, "fl1e has bad flags");
4436 });
4437 return 0;
4440 int sh_audit_l2_table(struct vcpu *v, mfn_t sl2mfn, mfn_t x)
4442 guest_l2e_t *gl2e, *gp;
4443 shadow_l2e_t *sl2e;
4444 mfn_t mfn, gmfn, gl2mfn;
4445 gfn_t gfn;
4446 char *s;
4447 int done = 0;
4448 #if GUEST_PAGING_LEVELS != 4
4449 int xen_mappings = !shadow_mode_external(v->domain);
4450 #endif
4452 /* Follow the backpointer */
4453 gl2mfn = _mfn(mfn_to_page(sl2mfn)->u.inuse.type_info);
4454 gl2e = gp = sh_map_domain_page(gl2mfn);
4455 SHADOW_FOREACH_L2E(sl2mfn, sl2e, &gl2e, done, xen_mappings, {
4457 s = sh_audit_flags(v, 2, guest_l2e_get_flags(*gl2e),
4458 shadow_l2e_get_flags(*sl2e));
4459 if ( s ) AUDIT_FAIL(2, "%s", s);
4461 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4463 gfn = guest_l2e_get_gfn(*gl2e);
4464 mfn = shadow_l2e_get_mfn(*sl2e);
4465 gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)
4466 ? get_fl1_shadow_status(v, gfn)
4467 : get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl2mfn),
4468 PGC_SH_l1_shadow);
4469 if ( mfn_x(gmfn) != mfn_x(mfn) )
4470 AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
4471 " (--> %" SH_PRI_mfn ")"
4472 " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
4473 gfn_x(gfn),
4474 (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
4475 : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)),
4476 mfn_x(gmfn), mfn_x(mfn));
4478 });
4479 sh_unmap_domain_page(gp);
4480 return 0;
4483 #if GUEST_PAGING_LEVELS >= 3
4484 int sh_audit_l3_table(struct vcpu *v, mfn_t sl3mfn, mfn_t x)
4486 guest_l3e_t *gl3e, *gp;
4487 shadow_l3e_t *sl3e;
4488 mfn_t mfn, gmfn, gl3mfn;
4489 gfn_t gfn;
4490 char *s;
4491 int done = 0;
4493 /* Follow the backpointer */
4494 gl3mfn = _mfn(mfn_to_page(sl3mfn)->u.inuse.type_info);
4495 gl3e = gp = sh_map_domain_page(gl3mfn);
4496 SHADOW_FOREACH_L3E(sl3mfn, sl3e, &gl3e, done, {
4498 s = sh_audit_flags(v, 3, guest_l3e_get_flags(*gl3e),
4499 shadow_l3e_get_flags(*sl3e));
4500 if ( s ) AUDIT_FAIL(3, "%s", s);
4502 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4504 gfn = guest_l3e_get_gfn(*gl3e);
4505 mfn = shadow_l3e_get_mfn(*sl3e);
4506 gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl3mfn),
4507 (GUEST_PAGING_LEVELS == 3
4508 && !shadow_mode_external(v->domain)
4509 && (guest_index(gl3e) % 4) == 3)
4510 ? PGC_SH_l2h_pae_shadow
4511 : PGC_SH_l2_shadow);
4512 if ( mfn_x(gmfn) != mfn_x(mfn) )
4513 AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn
4514 " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
4515 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
4517 });
4518 sh_unmap_domain_page(gp);
4519 return 0;
4521 #endif /* GUEST_PAGING_LEVELS >= 3 */
4523 #if GUEST_PAGING_LEVELS >= 4
4524 int sh_audit_l4_table(struct vcpu *v, mfn_t sl4mfn, mfn_t x)
4526 guest_l4e_t *gl4e, *gp;
4527 shadow_l4e_t *sl4e;
4528 mfn_t mfn, gmfn, gl4mfn;
4529 gfn_t gfn;
4530 char *s;
4531 int done = 0;
4532 int xen_mappings = !shadow_mode_external(v->domain);
4534 /* Follow the backpointer */
4535 gl4mfn = _mfn(mfn_to_page(sl4mfn)->u.inuse.type_info);
4536 gl4e = gp = sh_map_domain_page(gl4mfn);
4537 SHADOW_FOREACH_L4E(sl4mfn, sl4e, &gl4e, done, xen_mappings,
4539 s = sh_audit_flags(v, 4, guest_l4e_get_flags(*gl4e),
4540 shadow_l4e_get_flags(*sl4e));
4541 if ( s ) AUDIT_FAIL(4, "%s", s);
4543 if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_MFNS )
4545 gfn = guest_l4e_get_gfn(*gl4e);
4546 mfn = shadow_l4e_get_mfn(*sl4e);
4547 gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl4mfn),
4548 PGC_SH_l3_shadow);
4549 if ( mfn_x(gmfn) != mfn_x(mfn) )
4550 AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn
4551 " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn "\n",
4552 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn));
4554 });
4555 sh_unmap_domain_page(gp);
4556 return 0;
4558 #endif /* GUEST_PAGING_LEVELS >= 4 */
4561 #undef AUDIT_FAIL
4563 #endif /* Audit code */
4565 /**************************************************************************/
4566 /* Entry points into this mode of the shadow code.
4567 * This will all be mangled by the preprocessor to uniquify everything. */
4568 struct shadow_paging_mode sh_paging_mode = {
4569 .page_fault = sh_page_fault,
4570 .invlpg = sh_invlpg,
4571 .gva_to_gpa = sh_gva_to_gpa,
4572 .gva_to_gfn = sh_gva_to_gfn,
4573 .update_cr3 = sh_update_cr3,
4574 .map_and_validate_gl1e = sh_map_and_validate_gl1e,
4575 .map_and_validate_gl2e = sh_map_and_validate_gl2e,
4576 .map_and_validate_gl2he = sh_map_and_validate_gl2he,
4577 .map_and_validate_gl3e = sh_map_and_validate_gl3e,
4578 .map_and_validate_gl4e = sh_map_and_validate_gl4e,
4579 .detach_old_tables = sh_detach_old_tables,
4580 .x86_emulate_write = sh_x86_emulate_write,
4581 .x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg,
4582 .x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b,
4583 .make_monitor_table = sh_make_monitor_table,
4584 .destroy_monitor_table = sh_destroy_monitor_table,
4585 .guest_map_l1e = sh_guest_map_l1e,
4586 .guest_get_eff_l1e = sh_guest_get_eff_l1e,
4587 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
4588 .guess_wrmap = sh_guess_wrmap,
4589 #endif
4590 .guest_levels = GUEST_PAGING_LEVELS,
4591 .shadow_levels = SHADOW_PAGING_LEVELS,
4592 };
4594 /*
4595 * Local variables:
4596 * mode: C
4597 * c-set-style: "BSD"
4598 * c-basic-offset: 4
4599 * indent-tabs-mode: nil
4600 * End:
4601 */