ia64/xen-unstable

annotate xen/arch/x86/mm/shadow/multi.c @ 18479:fa2adc7fb996

x86, shadow: Fix some SHADOW_PRINTK() callers.
Signed-off-by: Yoshi Tamura <tamura.yoshiaki@lab.ntt.co.jp>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Sep 11 15:17:31 2008 +0100 (2008-09-11)
parents 74621a2add54
children c353f07bae84
rev   line source
kaf24@11310 1 /******************************************************************************
kaf24@11310 2 * arch/x86/mm/shadow/multi.c
kaf24@11310 3 *
kaf24@11310 4 * Simple, mostly-synchronous shadow page tables.
kaf24@11310 5 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
kaf24@11310 6 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
kaf24@11310 7 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
kaf24@11310 8 *
kaf24@11310 9 * This program is free software; you can redistribute it and/or modify
kaf24@11310 10 * it under the terms of the GNU General Public License as published by
kaf24@11310 11 * the Free Software Foundation; either version 2 of the License, or
kaf24@11310 12 * (at your option) any later version.
kaf24@11310 13 *
kaf24@11310 14 * This program is distributed in the hope that it will be useful,
kaf24@11310 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
kaf24@11310 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
kaf24@11310 17 * GNU General Public License for more details.
kaf24@11310 18 *
kaf24@11310 19 * You should have received a copy of the GNU General Public License
kaf24@11310 20 * along with this program; if not, write to the Free Software
kaf24@11310 21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
kaf24@11310 22 */
kaf24@11310 23
kaf24@11310 24 #include <xen/config.h>
kaf24@11310 25 #include <xen/types.h>
kaf24@11310 26 #include <xen/mm.h>
kaf24@11310 27 #include <xen/trace.h>
kaf24@11310 28 #include <xen/sched.h>
kaf24@11310 29 #include <xen/perfc.h>
kaf24@11310 30 #include <xen/domain_page.h>
kaf24@11310 31 #include <asm/page.h>
kaf24@11310 32 #include <asm/current.h>
kaf24@11310 33 #include <asm/shadow.h>
kaf24@11310 34 #include <asm/flushtlb.h>
kaf24@11310 35 #include <asm/hvm/hvm.h>
keir@16197 36 #include <asm/hvm/cacheattr.h>
keir@16186 37 #include <asm/mtrr.h>
kaf24@11310 38 #include "private.h"
kaf24@11310 39 #include "types.h"
kaf24@11310 40
Tim@12069 41 /* THINGS TO DO LATER:
kaf24@11310 42 *
kaf24@11310 43 * TEARDOWN HEURISTICS
kaf24@11310 44 * Also: have a heuristic for when to destroy a previous paging-mode's
kaf24@11310 45 * shadows. When a guest is done with its start-of-day 32-bit tables
kaf24@11310 46 * and reuses the memory we want to drop those shadows. Start with
kaf24@11310 47 * shadows in a page in two modes as a hint, but beware of clever tricks
kaf24@11310 48 * like reusing a pagetable for both PAE and 64-bit during boot...
kaf24@11310 49 *
kaf24@11310 50 * PAE LINEAR MAPS
kaf24@11310 51 * Rework shadow_get_l*e() to have the option of using map_domain_page()
kaf24@11310 52 * instead of linear maps. Add appropriate unmap_l*e calls in the users.
kaf24@11310 53 * Then we can test the speed difference made by linear maps. If the
kaf24@11310 54 * map_domain_page() version is OK on PAE, we could maybe allow a lightweight
kaf24@11310 55 * l3-and-l2h-only shadow mode for PAE PV guests that would allow them
kaf24@11310 56 * to share l2h pages again.
kaf24@11310 57 *
kaf24@11310 58 * PSE disabled / PSE36
kaf24@11310 59 * We don't support any modes other than PSE enabled, PSE36 disabled.
kaf24@11310 60 * Neither of those would be hard to change, but we'd need to be able to
kaf24@11310 61 * deal with shadows made in one mode and used in another.
kaf24@11310 62 */
kaf24@11310 63
kaf24@11310 64 #define FETCH_TYPE_PREFETCH 1
kaf24@11310 65 #define FETCH_TYPE_DEMAND 2
kaf24@11310 66 #define FETCH_TYPE_WRITE 4
kaf24@11310 67 typedef enum {
kaf24@11310 68 ft_prefetch = FETCH_TYPE_PREFETCH,
kaf24@11310 69 ft_demand_read = FETCH_TYPE_DEMAND,
kaf24@11310 70 ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,
kaf24@11310 71 } fetch_type_t;
kaf24@11310 72
kaf24@11310 73 #ifdef DEBUG_TRACE_DUMP
kaf24@11310 74 static char *fetch_type_names[] = {
kaf24@11310 75 [ft_prefetch] "prefetch",
kaf24@11310 76 [ft_demand_read] "demand read",
kaf24@11310 77 [ft_demand_write] "demand write",
kaf24@11310 78 };
kaf24@11310 79 #endif
kaf24@11310 80
kaf24@11310 81 /**************************************************************************/
kaf24@11310 82 /* Hash table mapping from guest pagetables to shadows
kaf24@11310 83 *
kaf24@11310 84 * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
kaf24@11310 85 * FL1's: maps the *gfn* of the start of a superpage to the mfn of a
kaf24@11310 86 * shadow L1 which maps its "splinters".
kaf24@11310 87 */
kaf24@11310 88
kaf24@11310 89 static inline mfn_t
kaf24@11310 90 get_fl1_shadow_status(struct vcpu *v, gfn_t gfn)
kaf24@11310 91 /* Look for FL1 shadows in the hash table */
kaf24@11310 92 {
Tim@12561 93 mfn_t smfn = shadow_hash_lookup(v, gfn_x(gfn), SH_type_fl1_shadow);
kaf24@11310 94 return smfn;
kaf24@11310 95 }
kaf24@11310 96
kaf24@11310 97 static inline mfn_t
kaf24@11310 98 get_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
kaf24@11310 99 /* Look for shadows in the hash table */
kaf24@11310 100 {
Tim@12561 101 mfn_t smfn = shadow_hash_lookup(v, mfn_x(gmfn), shadow_type);
kfraser@14595 102 perfc_incr(shadow_get_shadow_status);
kaf24@11310 103 return smfn;
kaf24@11310 104 }
kaf24@11310 105
kaf24@11310 106 static inline void
kaf24@11310 107 set_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
kaf24@11310 108 /* Put an FL1 shadow into the hash table */
kaf24@11310 109 {
kaf24@11310 110 SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
Tim@12561 111 gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
kaf24@11310 112
Tim@12561 113 shadow_hash_insert(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
kaf24@11310 114 }
kaf24@11310 115
kaf24@11310 116 static inline void
kaf24@11310 117 set_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
kaf24@11310 118 /* Put a shadow into the hash table */
kaf24@11310 119 {
kaf24@11310 120 struct domain *d = v->domain;
kaf24@11310 121 int res;
kaf24@11310 122
kaf24@11310 123 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
kaf24@11310 124 d->domain_id, v->vcpu_id, mfn_x(gmfn),
kaf24@11310 125 shadow_type, mfn_x(smfn));
kaf24@11310 126
Tim@14962 127 /* 32-on-64 PV guests don't own their l4 pages so can't get_page them */
kfraser@14974 128 if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
ack@14013 129 {
ack@14013 130 res = get_page(mfn_to_page(gmfn), d);
ack@14013 131 ASSERT(res == 1);
ack@14013 132 }
kaf24@11310 133
Tim@12561 134 shadow_hash_insert(v, mfn_x(gmfn), shadow_type, smfn);
kaf24@11310 135 }
kaf24@11310 136
kaf24@11310 137 static inline void
kaf24@11310 138 delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
kaf24@11310 139 /* Remove a shadow from the hash table */
kaf24@11310 140 {
kaf24@11310 141 SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
Tim@12561 142 gfn_x(gfn), SH_type_fl1_shadow, mfn_x(smfn));
Tim@12561 143 shadow_hash_delete(v, gfn_x(gfn), SH_type_fl1_shadow, smfn);
kaf24@11310 144 }
kaf24@11310 145
kaf24@11310 146 static inline void
kaf24@11310 147 delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
kaf24@11310 148 /* Remove a shadow from the hash table */
kaf24@11310 149 {
kaf24@11310 150 SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
kaf24@11310 151 v->domain->domain_id, v->vcpu_id,
kaf24@11310 152 mfn_x(gmfn), shadow_type, mfn_x(smfn));
Tim@12561 153 shadow_hash_delete(v, mfn_x(gmfn), shadow_type, smfn);
Tim@14962 154 /* 32-on-64 PV guests don't own their l4 pages; see set_shadow_status */
kfraser@14974 155 if ( !is_pv_32on64_vcpu(v) || shadow_type != SH_type_l4_64_shadow )
ack@14013 156 put_page(mfn_to_page(gmfn));
kaf24@11310 157 }
kaf24@11310 158
kaf24@11310 159 /**************************************************************************/
kaf24@11310 160 /* CPU feature support querying */
kaf24@11310 161
kaf24@11310 162 static inline int
kaf24@11310 163 guest_supports_superpages(struct vcpu *v)
kaf24@11310 164 {
kaf24@11310 165 /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
Tim@15812 166 * CR4.PSE is set or the guest is in PAE or long mode.
Tim@15812 167 * It's also used in the dummy PT for vcpus with CR4.PG cleared. */
Tim@15812 168 return (is_hvm_vcpu(v) &&
Tim@15812 169 (GUEST_PAGING_LEVELS != 2
Tim@15812 170 || !hvm_paging_enabled(v)
Tim@15812 171 || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));
kaf24@11310 172 }
kaf24@11310 173
kaf24@11310 174 static inline int
kaf24@11310 175 guest_supports_nx(struct vcpu *v)
kaf24@11310 176 {
kfraser@15105 177 if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
kfraser@15105 178 return 0;
kfraser@12210 179 if ( !is_hvm_vcpu(v) )
Tim@16313 180 return cpu_has_nx;
kfraser@15105 181 return hvm_nx_enabled(v);
kaf24@11310 182 }
kaf24@11310 183
kaf24@11310 184
kaf24@11310 185 /**************************************************************************/
kaf24@11310 186 /* Functions for walking the guest page tables */
kaf24@11310 187
Tim@16313 188 /* Flags that are needed in a pagetable entry, with the sense of NX inverted */
Tim@16313 189 static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec)
Tim@16313 190 {
Tim@16313 191 static uint32_t flags[] = {
Tim@16313 192 /* I/F - Usr Wr */
Tim@16313 193 /* 0 0 0 0 */ _PAGE_PRESENT,
Tim@16313 194 /* 0 0 0 1 */ _PAGE_PRESENT|_PAGE_RW,
Tim@16313 195 /* 0 0 1 0 */ _PAGE_PRESENT|_PAGE_USER,
Tim@16313 196 /* 0 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
Tim@16313 197 /* 0 1 0 0 */ _PAGE_PRESENT,
Tim@16313 198 /* 0 1 0 1 */ _PAGE_PRESENT|_PAGE_RW,
Tim@16313 199 /* 0 1 1 0 */ _PAGE_PRESENT|_PAGE_USER,
Tim@16313 200 /* 0 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
Tim@16313 201 /* 1 0 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
Tim@16313 202 /* 1 0 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
Tim@16313 203 /* 1 0 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
Tim@16313 204 /* 1 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
Tim@16313 205 /* 1 1 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
Tim@16313 206 /* 1 1 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
Tim@16313 207 /* 1 1 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
Tim@16313 208 /* 1 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
Tim@16313 209 };
keir@16626 210
Tim@16313 211 /* Don't demand not-NX if the CPU wouldn't enforce it. */
Tim@16313 212 if ( !guest_supports_nx(v) )
keir@16626 213 pfec &= ~PFEC_insn_fetch;
keir@16626 214
keir@16626 215 /* Don't demand R/W if the CPU wouldn't enforce it. */
keir@16626 216 if ( is_hvm_vcpu(v) && unlikely(!hvm_wp_enabled(v))
keir@16626 217 && !(pfec & PFEC_user_mode) )
keir@16626 218 pfec &= ~PFEC_write_access;
keir@16626 219
keir@16626 220 return flags[(pfec & 0x1f) >> 1];
Tim@16313 221 }
Tim@16313 222
keir@16321 223 /* Modify a guest pagetable entry to set the Accessed and Dirty bits.
keir@16321 224 * Returns non-zero if it actually writes to guest memory. */
keir@16321 225 static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
Tim@16313 226 {
keir@16321 227 guest_intpte_t old, new;
keir@18454 228 int ret = 0;
keir@16321 229
keir@16321 230 old = *(guest_intpte_t *)walk_p;
keir@16321 231 new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0);
keir@16321 232 if ( old != new )
Tim@16313 233 {
keir@16321 234 /* Write the new entry into the walk, and try to write it back
keir@16321 235 * into the guest table as well. If the guest table has changed
keir@16321 236 * under out feet then leave it alone. */
keir@16321 237 *(guest_intpte_t *)walk_p = new;
keir@18454 238 if( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old )
keir@18454 239 ret = 1;
keir@18454 240
keir@18454 241 /* FIXME -- this code is longer than necessary */
keir@18454 242 if(set_dirty)
keir@18454 243 TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_AD);
keir@18454 244 else
keir@18454 245 TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_A);
Tim@16313 246 }
keir@18454 247 return ret;
Tim@16313 248 }
Tim@16313 249
keir@17151 250 /* This validation is called with lock held, and after write permission
keir@17151 251 * removal. Then check is atomic and no more inconsistent content can
keir@17151 252 * be observed before lock is released
keir@17151 253 *
keir@17151 254 * Return 1 to indicate success and 0 for inconsistency
keir@17151 255 */
keir@17151 256 static inline uint32_t
keir@17151 257 shadow_check_gwalk(struct vcpu *v, unsigned long va, walk_t *gw)
keir@17151 258 {
keir@17151 259 struct domain *d = v->domain;
keir@17151 260 guest_l1e_t *l1p;
keir@17151 261 guest_l2e_t *l2p;
keir@17151 262 #if GUEST_PAGING_LEVELS >= 4
keir@17151 263 guest_l3e_t *l3p;
keir@17151 264 guest_l4e_t *l4p;
keir@17151 265 #endif
keir@17158 266 int mismatch = 0;
keir@17151 267
keir@17151 268 ASSERT(shadow_locked_by_me(d));
keir@17151 269
keir@17151 270 if ( gw->version ==
keir@17151 271 atomic_read(&d->arch.paging.shadow.gtable_dirty_version) )
keir@17151 272 return 1;
keir@17151 273
keir@17151 274 /* We may consider caching guest page mapping from last
keir@17151 275 * guest table walk. However considering this check happens
keir@17151 276 * relatively less-frequent, and a bit burden here to
keir@17151 277 * remap guest page is better than caching mapping in each
keir@17151 278 * guest table walk.
keir@17151 279 *
keir@17151 280 * Also when inconsistency occurs, simply return to trigger
keir@17151 281 * another fault instead of re-validate new path to make
keir@17151 282 * logic simple.
keir@17151 283 */
keir@17151 284 perfc_incr(shadow_check_gwalk);
keir@17151 285 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
keir@17151 286 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
keir@17151 287 l4p = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable;
keir@17158 288 mismatch |= (gw->l4e.l4 != l4p[guest_l4_table_offset(va)].l4);
keir@17151 289 l3p = sh_map_domain_page(gw->l3mfn);
keir@17158 290 mismatch |= (gw->l3e.l3 != l3p[guest_l3_table_offset(va)].l3);
keir@17158 291 sh_unmap_domain_page(l3p);
keir@17151 292 #else
keir@17158 293 mismatch |= (gw->l3e.l3 !=
keir@17158 294 v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)].l3);
keir@17151 295 #endif
keir@17151 296 l2p = sh_map_domain_page(gw->l2mfn);
keir@17158 297 mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
keir@17158 298 sh_unmap_domain_page(l2p);
keir@17151 299 #else
keir@17151 300 l2p = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable;
keir@17158 301 mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
keir@17151 302 #endif
keir@17151 303 if ( !(guest_supports_superpages(v) &&
keir@17151 304 (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
keir@17151 305 {
keir@17151 306 l1p = sh_map_domain_page(gw->l1mfn);
keir@17158 307 mismatch |= (gw->l1e.l1 != l1p[guest_l1_table_offset(va)].l1);
keir@17158 308 sh_unmap_domain_page(l1p);
keir@17151 309 }
keir@17151 310
keir@17158 311 return !mismatch;
keir@17151 312 }
keir@17151 313
keir@17151 314 /* Remove write access permissions from a gwalk_t in a batch, and
keir@17903 315 * return OR-ed result for TLB flush hint and need to rewalk the guest
keir@17903 316 * pages.
keir@17903 317 *
keir@17903 318 * Syncing pages will remove write access to that page; but it may
keir@17903 319 * also give write access to other pages in the path. If we resync any
keir@17903 320 * pages, re-walk from the beginning.
keir@17151 321 */
keir@17903 322 #define GW_RMWR_FLUSHTLB 1
keir@17903 323 #define GW_RMWR_REWALK 2
keir@17903 324
keir@17151 325 static inline uint32_t
keir@17151 326 gw_remove_write_accesses(struct vcpu *v, unsigned long va, walk_t *gw)
keir@17151 327 {
keir@17903 328 uint32_t rc = 0;
keir@17151 329
keir@17151 330 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
keir@17151 331 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
keir@17903 332 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17903 333 if ( mfn_is_out_of_sync(gw->l3mfn) )
keir@17903 334 {
keir@17903 335 sh_resync(v, gw->l3mfn);
keir@17903 336 rc = GW_RMWR_REWALK;
keir@17903 337 }
keir@17903 338 else
keir@17903 339 #endif /* OOS */
keir@17903 340 if ( sh_remove_write_access(v, gw->l3mfn, 3, va) )
keir@17903 341 rc = GW_RMWR_FLUSHTLB;
keir@17903 342 #endif /* GUEST_PAGING_LEVELS >= 4 */
keir@17903 343
keir@17903 344 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17903 345 if ( mfn_is_out_of_sync(gw->l2mfn) )
keir@17903 346 {
keir@17903 347 sh_resync(v, gw->l2mfn);
keir@17903 348 rc |= GW_RMWR_REWALK;
keir@17903 349 }
keir@17903 350 else
keir@17903 351 #endif /* OOS */
keir@17903 352 if ( sh_remove_write_access(v, gw->l2mfn, 2, va) )
keir@17903 353 rc |= GW_RMWR_FLUSHTLB;
keir@17903 354 #endif /* GUEST_PAGING_LEVELS >= 3 */
keir@17903 355
keir@17151 356 if ( !(guest_supports_superpages(v) &&
keir@17903 357 (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE))
keir@17903 358 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17903 359 && !mfn_is_out_of_sync(gw->l1mfn)
keir@17903 360 #endif /* OOS */
keir@17903 361 && sh_remove_write_access(v, gw->l1mfn, 1, va) )
keir@17903 362 rc |= GW_RMWR_FLUSHTLB;
keir@17151 363
keir@17151 364 return rc;
keir@17151 365 }
keir@17151 366
Tim@16313 367 /* Walk the guest pagetables, after the manner of a hardware walker.
Tim@16313 368 *
Tim@16313 369 * Inputs: a vcpu, a virtual address, a walk_t to fill, a
keir@17151 370 * pointer to a pagefault code
Tim@16313 371 *
Tim@16313 372 * We walk the vcpu's guest pagetables, filling the walk_t with what we
Tim@16313 373 * see and adding any Accessed and Dirty bits that are needed in the
Tim@16313 374 * guest entries. Using the pagefault code, we check the permissions as
Tim@16313 375 * we go. For the purposes of reading pagetables we treat all non-RAM
Tim@16313 376 * memory as contining zeroes.
Tim@16313 377 *
keir@17151 378 * The walk is done in a lock-free style, with some sanity check postponed
keir@17151 379 * after grabbing shadow lock later. Those delayed checks will make sure
keir@17151 380 * no inconsistent mapping being translated into shadow page table.
Tim@16313 381 *
keir@16626 382 * Returns 0 for success, or the set of permission bits that we failed on
keir@16626 383 * if the walk did not complete.
Tim@16313 384 * N.B. This is different from the old return code but almost no callers
Tim@16313 385 * checked the old return code anyway.
Tim@16313 386 */
keir@16321 387 static uint32_t
keir@17151 388 guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, uint32_t pfec)
kaf24@11310 389 {
Tim@15812 390 struct domain *d = v->domain;
Tim@15863 391 p2m_type_t p2mt;
keir@16321 392 guest_l1e_t *l1p = NULL;
keir@16321 393 guest_l2e_t *l2p = NULL;
Tim@16313 394 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
keir@16321 395 guest_l3e_t *l3p = NULL;
keir@16321 396 guest_l4e_t *l4p;
Tim@16313 397 #endif
keir@16321 398 uint32_t gflags, mflags, rc = 0;
keir@16321 399 int pse;
Tim@16313 400
kfraser@14595 401 perfc_incr(shadow_guest_walk);
kaf24@11310 402 memset(gw, 0, sizeof(*gw));
kaf24@11310 403 gw->va = va;
kaf24@11310 404
keir@17151 405 gw->version = atomic_read(&d->arch.paging.shadow.gtable_dirty_version);
keir@17151 406 rmb();
keir@17151 407
keir@16321 408 /* Mandatory bits that must be set in every entry. We invert NX, to
keir@16321 409 * calculate as if there were an "X" bit that allowed access.
keir@16321 410 * We will accumulate, in rc, the set of flags that are missing. */
keir@16321 411 mflags = mandatory_flags(v, pfec);
keir@16321 412
kaf24@11310 413 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
kaf24@11310 414 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
keir@16321 415
Tim@16313 416 /* Get the l4e from the top level table and check its flags*/
kaf24@11310 417 gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
keir@16321 418 l4p = ((guest_l4e_t *)v->arch.paging.shadow.guest_vtable);
keir@16321 419 gw->l4e = l4p[guest_l4_table_offset(va)];
keir@16321 420 gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
keir@16321 421 rc |= ((gflags & mflags) ^ mflags);
keir@16321 422 if ( rc & _PAGE_PRESENT ) goto out;
Tim@16313 423
Tim@16313 424 /* Map the l3 table */
Tim@16313 425 gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
keir@16321 426 if ( !p2m_is_ram(p2mt) )
keir@16321 427 {
keir@16321 428 rc |= _PAGE_PRESENT;
keir@16321 429 goto out;
keir@16321 430 }
Tim@15863 431 ASSERT(mfn_valid(gw->l3mfn));
keir@17151 432
Tim@16313 433 /* Get the l3e and check its flags*/
Tim@16313 434 l3p = sh_map_domain_page(gw->l3mfn);
keir@16321 435 gw->l3e = l3p[guest_l3_table_offset(va)];
keir@16321 436 gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
keir@16321 437 rc |= ((gflags & mflags) ^ mflags);
keir@16321 438 if ( rc & _PAGE_PRESENT )
keir@16321 439 goto out;
Tim@16313 440
kaf24@11310 441 #else /* PAE only... */
Tim@16313 442
Tim@16313 443 /* Get l3e from the cache of the top level table and check its flag */
Tim@16313 444 gw->l3e = v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
keir@16321 445 if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) )
keir@16321 446 {
keir@16321 447 rc |= _PAGE_PRESENT;
keir@16321 448 goto out;
keir@16321 449 }
Tim@16313 450
kaf24@11310 451 #endif /* PAE or 64... */
Tim@16313 452
Tim@16313 453 /* Map the l2 table */
Tim@16313 454 gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
keir@16321 455 if ( !p2m_is_ram(p2mt) )
keir@16321 456 {
keir@16321 457 rc |= _PAGE_PRESENT;
keir@16321 458 goto out;
keir@16321 459 }
Tim@15863 460 ASSERT(mfn_valid(gw->l2mfn));
keir@17151 461
Tim@16313 462 /* Get the l2e */
Tim@16313 463 l2p = sh_map_domain_page(gw->l2mfn);
keir@16321 464 gw->l2e = l2p[guest_l2_table_offset(va)];
Tim@16313 465
kaf24@11310 466 #else /* 32-bit only... */
Tim@16313 467
keir@16321 468 /* Get l2e from the top level table */
kaf24@11310 469 gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
keir@16321 470 l2p = ((guest_l2e_t *)v->arch.paging.shadow.guest_vtable);
keir@16321 471 gw->l2e = l2p[guest_l2_table_offset(va)];
Tim@16313 472
kaf24@11310 473 #endif /* All levels... */
Tim@16313 474
keir@16321 475 gflags = guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
keir@16321 476 rc |= ((gflags & mflags) ^ mflags);
keir@16321 477 if ( rc & _PAGE_PRESENT )
keir@16321 478 goto out;
keir@16321 479
keir@16321 480 pse = (guest_supports_superpages(v) &&
keir@16321 481 (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE));
keir@16321 482
keir@16321 483 if ( pse )
kaf24@11310 484 {
kaf24@11310 485 /* Special case: this guest VA is in a PSE superpage, so there's
kaf24@11310 486 * no guest l1e. We make one up so that the propagation code
kaf24@11310 487 * can generate a shadow l1 table. Start with the gfn of the
kaf24@11310 488 * first 4k-page of the superpage. */
Tim@16313 489 gfn_t start = guest_l2e_get_gfn(gw->l2e);
kaf24@11310 490 /* Grant full access in the l1e, since all the guest entry's
Tim@16313 491 * access controls are enforced in the shadow l2e. */
kaf24@11310 492 int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
kaf24@11310 493 _PAGE_ACCESSED|_PAGE_DIRTY);
kaf24@11310 494 /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
Tim@16313 495 * of the level 1. */
Tim@16313 496 if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE_PAT) )
Tim@16313 497 flags |= _PAGE_PAT;
Tim@16313 498 /* Copy the cache-control bits to the l1 as well, because we
Tim@16313 499 * can't represent PAT in the (non-PSE) shadow l2e. :(
Tim@16313 500 * This could cause problems if a guest ever maps an area of
Tim@16313 501 * memory with superpages using more than one caching mode. */
Tim@16313 502 flags |= guest_l2e_get_flags(gw->l2e) & (_PAGE_PWT|_PAGE_PCD);
kaf24@11310 503 /* Increment the pfn by the right number of 4k pages.
kaf24@11310 504 * The ~0x1 is to mask out the PAT bit mentioned above. */
kaf24@11310 505 start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
Tim@16313 506 gw->l1e = guest_l1e_from_gfn(start, flags);
kaf24@11310 507 gw->l1mfn = _mfn(INVALID_MFN);
kaf24@11310 508 }
kaf24@11310 509 else
kaf24@11310 510 {
kaf24@11310 511 /* Not a superpage: carry on and find the l1e. */
Tim@16313 512 gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
keir@16321 513 if ( !p2m_is_ram(p2mt) )
keir@16321 514 {
keir@16321 515 rc |= _PAGE_PRESENT;
keir@16321 516 goto out;
keir@16321 517 }
Tim@15863 518 ASSERT(mfn_valid(gw->l1mfn));
Tim@16313 519 l1p = sh_map_domain_page(gw->l1mfn);
keir@16321 520 gw->l1e = l1p[guest_l1_table_offset(va)];
keir@16321 521 gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
keir@16321 522 rc |= ((gflags & mflags) ^ mflags);
kaf24@11310 523 }
kaf24@11310 524
keir@16321 525 /* Go back and set accessed and dirty bits only if the walk was a
keir@16321 526 * success. Although the PRMs say higher-level _PAGE_ACCESSED bits
keir@16321 527 * get set whenever a lower-level PT is used, at least some hardware
keir@16321 528 * walkers behave this way. */
keir@16321 529 if ( rc == 0 )
keir@16321 530 {
keir@16321 531 #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */
keir@16321 532 if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) )
keir@16321 533 paging_mark_dirty(d, mfn_x(gw->l4mfn));
keir@16321 534 if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) )
keir@16321 535 paging_mark_dirty(d, mfn_x(gw->l3mfn));
keir@16321 536 #endif
keir@16321 537 if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
keir@16321 538 (pse && (pfec & PFEC_write_access))) )
keir@16321 539 paging_mark_dirty(d, mfn_x(gw->l2mfn));
keir@16321 540 if ( !pse )
keir@16321 541 {
keir@16321 542 if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e,
keir@16321 543 (pfec & PFEC_write_access)) )
keir@16321 544 paging_mark_dirty(d, mfn_x(gw->l1mfn));
keir@16321 545 }
keir@16321 546 }
keir@16321 547
keir@16321 548 out:
keir@16321 549 #if GUEST_PAGING_LEVELS == 4
keir@16321 550 if ( l3p ) sh_unmap_domain_page(l3p);
keir@16321 551 #endif
keir@16321 552 #if GUEST_PAGING_LEVELS >= 3
keir@16321 553 if ( l2p ) sh_unmap_domain_page(l2p);
keir@16321 554 #endif
keir@16321 555 if ( l1p ) sh_unmap_domain_page(l1p);
keir@16321 556
keir@16321 557 return rc;
kaf24@11310 558 }
kaf24@11310 559
kaf24@11310 560 /* Given a walk_t, translate the gw->va into the guest's notion of the
kaf24@11310 561 * corresponding frame number. */
kaf24@11310 562 static inline gfn_t
kaf24@11310 563 guest_walk_to_gfn(walk_t *gw)
kaf24@11310 564 {
Tim@16313 565 if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
kaf24@11310 566 return _gfn(INVALID_GFN);
Tim@16313 567 return guest_l1e_get_gfn(gw->l1e);
kaf24@11310 568 }
kaf24@11310 569
kaf24@11310 570 /* Given a walk_t, translate the gw->va into the guest's notion of the
kaf24@11310 571 * corresponding physical address. */
kaf24@11310 572 static inline paddr_t
kaf24@11310 573 guest_walk_to_gpa(walk_t *gw)
kaf24@11310 574 {
Tim@16313 575 if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
kaf24@11310 576 return 0;
Tim@16313 577 return guest_l1e_get_paddr(gw->l1e) + (gw->va & ~PAGE_MASK);
kaf24@11310 578 }
kaf24@11310 579
Tim@16313 580 #if 0 /* Keep for debugging */
kaf24@11310 581 /* Pretty-print the contents of a guest-walk */
kaf24@11310 582 static inline void print_gw(walk_t *gw)
kaf24@11310 583 {
kaf24@11310 584 SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va);
kaf24@11310 585 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
kaf24@11310 586 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
Tim@13909 587 SHADOW_PRINTK(" l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn));
Tim@16313 588 SHADOW_PRINTK(" l4e=%" SH_PRI_gpte "\n", gw->l4e.l4);
Tim@13915 589 SHADOW_PRINTK(" l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn));
kaf24@11310 590 #endif /* PAE or 64... */
Tim@16313 591 SHADOW_PRINTK(" l3e=%" SH_PRI_gpte "\n", gw->l3e.l3);
kaf24@11310 592 #endif /* All levels... */
Tim@13909 593 SHADOW_PRINTK(" l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn));
Tim@16313 594 SHADOW_PRINTK(" l2e=%" SH_PRI_gpte "\n", gw->l2e.l2);
Tim@13909 595 SHADOW_PRINTK(" l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn));
Tim@16313 596 SHADOW_PRINTK(" l1e=%" SH_PRI_gpte "\n", gw->l1e.l1);
kaf24@11310 597 }
Tim@16313 598 #endif /* 0 */
kaf24@11310 599
kaf24@11310 600 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
kaf24@11310 601 /* Lightweight audit: pass all the shadows associated with this guest walk
kaf24@11310 602 * through the audit mechanisms */
kaf24@11310 603 static void sh_audit_gw(struct vcpu *v, walk_t *gw)
kaf24@11310 604 {
kaf24@11310 605 mfn_t smfn;
kaf24@11310 606
kaf24@11310 607 if ( !(SHADOW_AUDIT_ENABLE) )
kaf24@11310 608 return;
kaf24@11310 609
kaf24@11310 610 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
Tim@12603 611 if ( mfn_valid(gw->l4mfn)
Tim@12603 612 && mfn_valid((smfn = get_shadow_status(v, gw->l4mfn,
Tim@12561 613 SH_type_l4_shadow))) )
kaf24@11310 614 (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
Tim@12603 615 if ( mfn_valid(gw->l3mfn)
Tim@12603 616 && mfn_valid((smfn = get_shadow_status(v, gw->l3mfn,
Tim@12561 617 SH_type_l3_shadow))) )
kaf24@11310 618 (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
Tim@11867 619 #endif /* PAE or 64... */
Tim@12603 620 if ( mfn_valid(gw->l2mfn) )
kaf24@11310 621 {
Tim@12603 622 if ( mfn_valid((smfn = get_shadow_status(v, gw->l2mfn,
Tim@12561 623 SH_type_l2_shadow))) )
kaf24@11310 624 (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
kaf24@11310 625 #if GUEST_PAGING_LEVELS == 3
Tim@12603 626 if ( mfn_valid((smfn = get_shadow_status(v, gw->l2mfn,
Tim@12561 627 SH_type_l2h_shadow))) )
kaf24@11310 628 (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
kaf24@11310 629 #endif
kaf24@11310 630 }
Tim@12603 631 if ( mfn_valid(gw->l1mfn)
Tim@12603 632 && mfn_valid((smfn = get_shadow_status(v, gw->l1mfn,
Tim@12561 633 SH_type_l1_shadow))) )
kaf24@11310 634 (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
Tim@16313 635 else if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PRESENT)
Tim@16313 636 && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)
Tim@12603 637 && mfn_valid(
Tim@16313 638 (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(gw->l2e)))) )
kaf24@11310 639 (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
kaf24@11310 640 }
kaf24@11310 641
kaf24@11310 642 #else
kaf24@11310 643 #define sh_audit_gw(_v, _gw) do {} while(0)
kaf24@11310 644 #endif /* audit code */
kaf24@11310 645
kaf24@11310 646
keir@17620 647 #if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS)
tim@11666 648 void *
tim@11666 649 sh_guest_map_l1e(struct vcpu *v, unsigned long addr,
tim@11666 650 unsigned long *gl1mfn)
tim@11666 651 {
tim@11666 652 void *pl1e = NULL;
tim@11666 653 walk_t gw;
tim@11666 654
tim@11666 655 ASSERT(shadow_mode_translate(v->domain));
tim@11666 656
tim@11666 657 // XXX -- this is expensive, but it's easy to cobble together...
tim@11666 658 // FIXME!
tim@11666 659
keir@17151 660 if ( guest_walk_tables(v, addr, &gw, PFEC_page_present) == 0
keir@16321 661 && mfn_valid(gw.l1mfn) )
tim@11666 662 {
tim@11666 663 if ( gl1mfn )
tim@11666 664 *gl1mfn = mfn_x(gw.l1mfn);
tim@11666 665 pl1e = map_domain_page(mfn_x(gw.l1mfn)) +
tim@11666 666 (guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
tim@11666 667 }
tim@11666 668
tim@11666 669 return pl1e;
tim@11666 670 }
tim@11666 671
tim@11666 672 void
tim@11666 673 sh_guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e)
tim@11666 674 {
tim@11666 675 walk_t gw;
tim@11666 676
tim@11666 677 ASSERT(shadow_mode_translate(v->domain));
tim@11666 678
tim@11666 679 // XXX -- this is expensive, but it's easy to cobble together...
tim@11666 680 // FIXME!
tim@11666 681
keir@17151 682 (void) guest_walk_tables(v, addr, &gw, PFEC_page_present);
Tim@16313 683 *(guest_l1e_t *)eff_l1e = gw.l1e;
tim@11666 684 }
keir@17620 685 #endif /* CONFIG == GUEST (== SHADOW) */
tim@11666 686
kaf24@11310 687 /**************************************************************************/
kaf24@11310 688 /* Functions to compute the correct index into a shadow page, given an
kaf24@11310 689 * index into the guest page (as returned by guest_get_index()).
kaf24@11310 690 * This is trivial when the shadow and guest use the same sized PTEs, but
kaf24@11310 691 * gets more interesting when those sizes are mismatched (e.g. 32-bit guest,
kaf24@11310 692 * PAE- or 64-bit shadows).
kaf24@11310 693 *
kaf24@11310 694 * These functions also increment the shadow mfn, when necessary. When PTE
kaf24@11310 695 * sizes are mismatched, it takes 2 shadow L1 pages for a single guest L1
kaf24@11310 696 * page. In this case, we allocate 2 contiguous pages for the shadow L1, and
kaf24@11310 697 * use simple pointer arithmetic on a pointer to the guest L1e to figure out
kaf24@11310 698 * which shadow page we really want. Similarly, when PTE sizes are
kaf24@11310 699 * mismatched, we shadow a guest L2 page with 4 shadow L2 pages. (The easiest
kaf24@11310 700 * way to see this is: a 32-bit guest L2 page maps 4GB of virtual address
kaf24@11310 701 * space, while a PAE- or 64-bit shadow L2 page maps 1GB of virtual address
kaf24@11310 702 * space.)
kaf24@11310 703 *
kaf24@11310 704 * For PAE guests, for every 32-bytes of guest L3 page table, we use 64-bytes
kaf24@11310 705 * of shadow (to store both the shadow, and the info that would normally be
kaf24@11310 706 * stored in page_info fields). This arrangement allows the shadow and the
kaf24@11310 707 * "page_info" fields to always be stored in the same page (in fact, in
kaf24@11310 708 * the same cache line), avoiding an extra call to map_domain_page().
kaf24@11310 709 */
kaf24@11310 710
kaf24@11310 711 static inline u32
kaf24@11310 712 guest_index(void *ptr)
kaf24@11310 713 {
kaf24@11310 714 return (u32)((unsigned long)ptr & ~PAGE_MASK) / sizeof(guest_l1e_t);
kaf24@11310 715 }
kaf24@11310 716
kfraser@12700 717 static u32
kaf24@11310 718 shadow_l1_index(mfn_t *smfn, u32 guest_index)
kaf24@11310 719 {
keir@17620 720 #if (GUEST_PAGING_LEVELS == 2)
kaf24@11310 721 *smfn = _mfn(mfn_x(*smfn) +
kaf24@11310 722 (guest_index / SHADOW_L1_PAGETABLE_ENTRIES));
kaf24@11310 723 return (guest_index % SHADOW_L1_PAGETABLE_ENTRIES);
kaf24@11310 724 #else
kaf24@11310 725 return guest_index;
kaf24@11310 726 #endif
kaf24@11310 727 }
kaf24@11310 728
kfraser@12700 729 static u32
kaf24@11310 730 shadow_l2_index(mfn_t *smfn, u32 guest_index)
kaf24@11310 731 {
keir@17620 732 #if (GUEST_PAGING_LEVELS == 2)
kaf24@11310 733 // Because we use 2 shadow l2 entries for each guest entry, the number of
kaf24@11310 734 // guest entries per shadow page is SHADOW_L2_PAGETABLE_ENTRIES/2
kaf24@11310 735 //
kaf24@11310 736 *smfn = _mfn(mfn_x(*smfn) +
kaf24@11310 737 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
kaf24@11310 738
keir@17620 739 // We multiply by two to get the index of the first of the two entries
kaf24@11310 740 // used to shadow the specified guest entry.
kaf24@11310 741 return (guest_index % (SHADOW_L2_PAGETABLE_ENTRIES / 2)) * 2;
kaf24@11310 742 #else
kaf24@11310 743 return guest_index;
kaf24@11310 744 #endif
kaf24@11310 745 }
kaf24@11310 746
Tim@11867 747 #if GUEST_PAGING_LEVELS >= 4
kaf24@11310 748
kfraser@12700 749 static u32
kaf24@11310 750 shadow_l3_index(mfn_t *smfn, u32 guest_index)
kaf24@11310 751 {
kaf24@11310 752 return guest_index;
kaf24@11310 753 }
kaf24@11310 754
kfraser@12700 755 static u32
kaf24@11310 756 shadow_l4_index(mfn_t *smfn, u32 guest_index)
kaf24@11310 757 {
kaf24@11310 758 return guest_index;
kaf24@11310 759 }
kaf24@11310 760
kaf24@11310 761 #endif // GUEST_PAGING_LEVELS >= 4
kaf24@11310 762
keir@17620 763
kaf24@11310 764 /**************************************************************************/
Tim@12069 765 /* Function which computes shadow entries from their corresponding guest
Tim@12069 766 * entries. This is the "heart" of the shadow code. It operates using
Tim@12069 767 * level-1 shadow types, but handles all levels of entry.
Tim@12069 768 * Don't call it directly, but use the four wrappers below.
kaf24@11310 769 */
Tim@12069 770
Tim@12069 771 static always_inline void
Tim@12069 772 _sh_propagate(struct vcpu *v,
Tim@16313 773 guest_intpte_t guest_intpte,
Tim@12069 774 mfn_t target_mfn,
Tim@12069 775 void *shadow_entry_ptr,
Tim@12069 776 int level,
Tim@12069 777 fetch_type_t ft,
Tim@15863 778 p2m_type_t p2mt)
kaf24@11310 779 {
Tim@16313 780 guest_l1e_t guest_entry = { guest_intpte };
Tim@12069 781 shadow_l1e_t *sp = shadow_entry_ptr;
kaf24@11310 782 struct domain *d = v->domain;
Tim@16313 783 gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
kaf24@11310 784 u32 pass_thru_flags;
Tim@12069 785 u32 gflags, sflags;
kaf24@11310 786
Tim@11867 787 /* We don't shadow PAE l3s */
Tim@11867 788 ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
Tim@11867 789
Tim@15863 790 /* Check there's something for the shadows to map to */
Tim@15863 791 if ( !p2m_is_valid(p2mt) )
Tim@15863 792 {
Tim@15863 793 *sp = shadow_l1e_empty();
Tim@15863 794 goto done;
Tim@15863 795 }
Tim@15863 796
Tim@16313 797 gflags = guest_l1e_get_flags(guest_entry);
Tim@12069 798
kaf24@11310 799 if ( unlikely(!(gflags & _PAGE_PRESENT)) )
Tim@12069 800 {
Tim@12069 801 /* If a guest l1 entry is not present, shadow with the magic
Tim@12069 802 * guest-not-present entry. */
Tim@12069 803 if ( level == 1 )
Tim@12069 804 *sp = sh_l1e_gnp();
Tim@12069 805 else
Tim@12069 806 *sp = shadow_l1e_empty();
Tim@12069 807 goto done;
Tim@12069 808 }
Tim@12069 809
Tim@15863 810 if ( level == 1 && p2mt == p2m_mmio_dm )
Tim@12069 811 {
Tim@15863 812 /* Guest l1e maps emulated MMIO space */
Tim@16313 813 *sp = sh_l1e_mmio(target_gfn, gflags);
Tim@14368 814 if ( !d->arch.paging.shadow.has_fast_mmio_entries )
Tim@14368 815 d->arch.paging.shadow.has_fast_mmio_entries = 1;
Tim@12069 816 goto done;
Tim@12069 817 }
Tim@12069 818
keir@16345 819 // Must have a valid target_mfn unless this is a prefetch or an l1
keir@16345 820 // pointing at MMIO space. In the case of a prefetch, an invalid
keir@16345 821 // mfn means that we can not usefully shadow anything, and so we
keir@16345 822 // return early.
kaf24@11310 823 //
keir@16345 824 if ( !mfn_valid(target_mfn)
keir@16345 825 && !(level == 1 && (!shadow_mode_refcounts(d)
keir@16345 826 || p2mt == p2m_mmio_direct)) )
kaf24@11310 827 {
Tim@12069 828 ASSERT((ft == ft_prefetch));
Tim@12069 829 *sp = shadow_l1e_empty();
Tim@12069 830 goto done;
kaf24@11310 831 }
kaf24@11310 832
kaf24@11310 833 // Propagate bits from the guest to the shadow.
kaf24@11310 834 // Some of these may be overwritten, below.
kaf24@11310 835 // Since we know the guest's PRESENT bit is set, we also set the shadow's
kaf24@11310 836 // SHADOW_PRESENT bit.
kaf24@11310 837 //
keir@17571 838 pass_thru_flags = (_PAGE_ACCESSED | _PAGE_USER |
Tim@11867 839 _PAGE_RW | _PAGE_PRESENT);
Tim@11867 840 if ( guest_supports_nx(v) )
Tim@11867 841 pass_thru_flags |= _PAGE_NX_BIT;
keir@16310 842 if ( !shadow_mode_refcounts(d) && !mfn_valid(target_mfn) )
keir@16310 843 pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT;
Tim@12069 844 sflags = gflags & pass_thru_flags;
kaf24@11310 845
Tim@16313 846 /*
Tim@16313 847 * For HVM domains with direct access to MMIO areas, set the correct
keir@16354 848 * caching attributes in the shadows to match what was asked for.
Tim@16313 849 */
keir@17974 850 if ( (level == 1) && is_hvm_domain(d) && has_arch_pdevs(d) &&
keir@16354 851 !is_xen_heap_mfn(mfn_x(target_mfn)) )
keir@16197 852 {
keir@16197 853 unsigned int type;
Tim@16313 854 if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), &type) )
keir@16197 855 sflags |= pat_type_2_pte_flags(type);
Tim@16313 856 else if ( d->arch.hvm_domain.is_in_uc_mode )
keir@16186 857 sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
keir@16186 858 else
keir@16186 859 sflags |= get_pat_flags(v,
keir@16186 860 gflags,
Tim@16313 861 gfn_to_paddr(target_gfn),
keir@17123 862 ((paddr_t)mfn_x(target_mfn)) << PAGE_SHIFT);
keir@16186 863 }
keir@16186 864
kaf24@11310 865 // Set the A&D bits for higher level shadows.
kaf24@11310 866 // Higher level entries do not, strictly speaking, have dirty bits, but
kaf24@11310 867 // since we use shadow linear tables, each of these entries may, at some
kaf24@11310 868 // point in time, also serve as a shadow L1 entry.
tim@11666 869 // By setting both the A&D bits in each of these, we eliminate the burden
kaf24@11310 870 // on the hardware to update these bits on initial accesses.
kaf24@11310 871 //
kaf24@11310 872 if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
kaf24@11310 873 sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
kaf24@11310 874
kaf24@11310 875 // If the A or D bit has not yet been set in the guest, then we must
kaf24@11310 876 // prevent the corresponding kind of access.
kaf24@11310 877 //
Tim@11867 878 if ( unlikely(!(gflags & _PAGE_ACCESSED)) )
kaf24@11310 879 sflags &= ~_PAGE_PRESENT;
kaf24@11310 880
tim@11666 881 /* D bits exist in L1es and PSE L2es */
tim@11666 882 if ( unlikely(((level == 1) ||
tim@11666 883 ((level == 2) &&
tim@11666 884 (gflags & _PAGE_PSE) &&
tim@11666 885 guest_supports_superpages(v)))
tim@11666 886 && !(gflags & _PAGE_DIRTY)) )
kaf24@11310 887 sflags &= ~_PAGE_RW;
kaf24@11310 888
Tim@12069 889 // shadow_mode_log_dirty support
kaf24@11310 890 //
Tim@12069 891 // Only allow the guest write access to a page a) on a demand fault,
Tim@12069 892 // or b) if the page is already marked as dirty.
Tim@12069 893 //
Tim@15863 894 // (We handle log-dirty entirely inside the shadow code, without using the
Tim@15863 895 // p2m_ram_logdirty p2m type: only HAP uses that.)
Tim@12069 896 if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
kaf24@11310 897 {
keir@16310 898 if ( mfn_valid(target_mfn) ) {
keir@16310 899 if ( ft & FETCH_TYPE_WRITE )
keir@16310 900 paging_mark_dirty(d, mfn_x(target_mfn));
keir@16310 901 else if ( !sh_mfn_is_dirty(d, target_mfn) )
keir@16310 902 sflags &= ~_PAGE_RW;
keir@16310 903 }
Tim@12069 904 }
Tim@15863 905
keir@17616 906 if ( unlikely((level == 1) && d->dirty_vram
keir@17616 907 && d->dirty_vram->last_dirty == -1
keir@17616 908 && gfn_x(target_gfn) >= d->dirty_vram->begin_pfn
keir@17616 909 && gfn_x(target_gfn) < d->dirty_vram->end_pfn) )
keir@17616 910 {
keir@17616 911 if ( ft & FETCH_TYPE_WRITE )
keir@17616 912 d->dirty_vram->last_dirty = NOW();
keir@17616 913 else
keir@17616 914 sflags &= ~_PAGE_RW;
keir@17616 915 }
keir@17616 916
Tim@15863 917 /* Read-only memory */
Tim@15863 918 if ( p2mt == p2m_ram_ro )
Tim@15863 919 sflags &= ~_PAGE_RW;
Tim@12069 920
Tim@12069 921 // protect guest page tables
Tim@12069 922 //
keir@17903 923 if ( unlikely((level == 1)
keir@17903 924 && sh_mfn_is_a_page_table(target_mfn)
keir@17903 925 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
keir@17984 926 /* Unless the page is out of sync and the guest is
keir@17984 927 writing to it. */
keir@17984 928 && !(mfn_oos_may_write(target_mfn)
keir@17984 929 && (ft == ft_demand_write))
keir@17903 930 #endif /* OOS */
keir@17903 931 ) )
Tim@12069 932 {
Tim@12069 933 if ( shadow_mode_trap_reads(d) )
kaf24@11310 934 {
Tim@12069 935 // if we are trapping both reads & writes, then mark this page
Tim@12069 936 // as not present...
Tim@12069 937 //
Tim@12069 938 sflags &= ~_PAGE_PRESENT;
kaf24@11310 939 }
Tim@12069 940 else
kaf24@11310 941 {
Tim@12069 942 // otherwise, just prevent any writes...
Tim@12069 943 //
Tim@12069 944 sflags &= ~_PAGE_RW;
kaf24@11310 945 }
kaf24@11310 946 }
kaf24@11310 947
tim@11666 948 // PV guests in 64-bit mode use two different page tables for user vs
tim@11666 949 // supervisor permissions, making the guest's _PAGE_USER bit irrelevant.
tim@11666 950 // It is always shadowed as present...
kfraser@14974 951 if ( (GUEST_PAGING_LEVELS == 4) && !is_pv_32on64_domain(d)
Tim@14962 952 && !is_hvm_domain(d) )
tim@11666 953 {
tim@11666 954 sflags |= _PAGE_USER;
tim@11666 955 }
tim@11666 956
Tim@12069 957 *sp = shadow_l1e_from_mfn(target_mfn, sflags);
Tim@15863 958
Tim@12069 959 done:
Tim@12069 960 SHADOW_DEBUG(PROPAGATE,
Tim@12069 961 "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
Tim@16313 962 fetch_type_names[ft], level, guest_entry.l1, sp->l1);
kaf24@11310 963 }
kaf24@11310 964
Tim@12069 965
Tim@16313 966 /* These four wrappers give us a little bit of type-safety back around
Tim@16313 967 * the use of void-* pointers and intpte types in _sh_propagate(), and
Tim@16313 968 * allow the compiler to optimize out some level checks. */
Tim@12069 969
kaf24@11310 970 #if GUEST_PAGING_LEVELS >= 4
kaf24@11310 971 static void
kaf24@11310 972 l4e_propagate_from_guest(struct vcpu *v,
Tim@16313 973 guest_l4e_t gl4e,
kaf24@11310 974 mfn_t sl3mfn,
Tim@12069 975 shadow_l4e_t *sl4e,
kaf24@11310 976 fetch_type_t ft)
kaf24@11310 977 {
Tim@16313 978 _sh_propagate(v, gl4e.l4, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
kaf24@11310 979 }
Tim@11867 980
kaf24@11310 981 static void
kaf24@11310 982 l3e_propagate_from_guest(struct vcpu *v,
Tim@16313 983 guest_l3e_t gl3e,
kaf24@11310 984 mfn_t sl2mfn,
Tim@12069 985 shadow_l3e_t *sl3e,
kaf24@11310 986 fetch_type_t ft)
kaf24@11310 987 {
Tim@16313 988 _sh_propagate(v, gl3e.l3, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
kaf24@11310 989 }
Tim@11867 990 #endif // GUEST_PAGING_LEVELS >= 4
kaf24@11310 991
kaf24@11310 992 static void
kaf24@11310 993 l2e_propagate_from_guest(struct vcpu *v,
Tim@16313 994 guest_l2e_t gl2e,
Tim@12069 995 mfn_t sl1mfn,
Tim@12069 996 shadow_l2e_t *sl2e,
kaf24@11310 997 fetch_type_t ft)
kaf24@11310 998 {
Tim@16313 999 _sh_propagate(v, gl2e.l2, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
kaf24@11310 1000 }
kaf24@11310 1001
Tim@12069 1002 static void
Tim@12069 1003 l1e_propagate_from_guest(struct vcpu *v,
Tim@16313 1004 guest_l1e_t gl1e,
Tim@12069 1005 mfn_t gmfn,
Tim@12069 1006 shadow_l1e_t *sl1e,
Tim@12069 1007 fetch_type_t ft,
Tim@15863 1008 p2m_type_t p2mt)
kaf24@11310 1009 {
Tim@16313 1010 _sh_propagate(v, gl1e.l1, gmfn, sl1e, 1, ft, p2mt);
kaf24@11310 1011 }
kaf24@11310 1012
kaf24@11310 1013
kaf24@11310 1014 /**************************************************************************/
kaf24@11310 1015 /* These functions update shadow entries (and do bookkeeping on the shadow
kaf24@11310 1016 * tables they are in). It is intended that they are the only
kaf24@11310 1017 * functions which ever write (non-zero) data onto a shadow page.
kaf24@11310 1018 */
kaf24@11310 1019
kaf24@11310 1020 static inline void safe_write_entry(void *dst, void *src)
kaf24@11310 1021 /* Copy one PTE safely when processors might be running on the
kaf24@11310 1022 * destination pagetable. This does *not* give safety against
kaf24@11310 1023 * concurrent writes (that's what the shadow lock is for), just
kaf24@11310 1024 * stops the hardware picking up partially written entries. */
kaf24@11310 1025 {
kaf24@11310 1026 volatile unsigned long *d = dst;
kaf24@11310 1027 unsigned long *s = src;
kaf24@11310 1028 ASSERT(!((unsigned long) d & (sizeof (shadow_l1e_t) - 1)));
kaf24@11310 1029 #if CONFIG_PAGING_LEVELS == 3
kaf24@11310 1030 /* In PAE mode, pagetable entries are larger
kaf24@11310 1031 * than machine words, so won't get written atomically. We need to make
kaf24@11310 1032 * sure any other cpu running on these shadows doesn't see a
kaf24@11310 1033 * half-written entry. Do this by marking the entry not-present first,
kaf24@11310 1034 * then writing the high word before the low word. */
kaf24@11310 1035 BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long));
kaf24@11310 1036 d[0] = 0;
kaf24@11310 1037 d[1] = s[1];
kaf24@11310 1038 d[0] = s[0];
kaf24@11310 1039 #else
keir@17620 1040 /* In 64-bit, sizeof(pte) == sizeof(ulong) == 1 word,
kaf24@11310 1041 * which will be an atomic write, since the entry is aligned. */
kaf24@11310 1042 BUILD_BUG_ON(sizeof (shadow_l1e_t) != sizeof (unsigned long));
kaf24@11310 1043 *d = *s;
kaf24@11310 1044 #endif
kaf24@11310 1045 }
kaf24@11310 1046
kaf24@11310 1047
kaf24@11310 1048 static inline void
kaf24@11310 1049 shadow_write_entries(void *d, void *s, int entries, mfn_t mfn)
kaf24@11310 1050 /* This function does the actual writes to shadow pages.
kaf24@11310 1051 * It must not be called directly, since it doesn't do the bookkeeping
kaf24@11310 1052 * that shadow_set_l*e() functions do. */
kaf24@11310 1053 {
kaf24@11310 1054 shadow_l1e_t *dst = d;
kaf24@11310 1055 shadow_l1e_t *src = s;
kaf24@11310 1056 void *map = NULL;
kaf24@11310 1057 int i;
kaf24@11310 1058
kaf24@11310 1059 /* Because we mirror access rights at all levels in the shadow, an
kaf24@11310 1060 * l2 (or higher) entry with the RW bit cleared will leave us with
kaf24@11310 1061 * no write access through the linear map.
kaf24@11310 1062 * We detect that by writing to the shadow with copy_to_user() and
kaf24@11310 1063 * using map_domain_page() to get a writeable mapping if we need to. */
kaf24@11310 1064 if ( __copy_to_user(d, d, sizeof (unsigned long)) != 0 )
kaf24@11310 1065 {
kfraser@14595 1066 perfc_incr(shadow_linear_map_failed);
kaf24@11310 1067 map = sh_map_domain_page(mfn);
kaf24@11310 1068 ASSERT(map != NULL);
kaf24@11310 1069 dst = map + ((unsigned long)dst & (PAGE_SIZE - 1));
kaf24@11310 1070 }
kaf24@11310 1071
kaf24@11310 1072
kaf24@11310 1073 for ( i = 0; i < entries; i++ )
kaf24@11310 1074 safe_write_entry(dst++, src++);
kaf24@11310 1075
kaf24@11310 1076 if ( map != NULL ) sh_unmap_domain_page(map);
kaf24@11310 1077 }
kaf24@11310 1078
kaf24@11310 1079 static inline int
kaf24@11310 1080 perms_strictly_increased(u32 old_flags, u32 new_flags)
kaf24@11310 1081 /* Given the flags of two entries, are the new flags a strict
kaf24@11310 1082 * increase in rights over the old ones? */
kaf24@11310 1083 {
kaf24@11310 1084 u32 of = old_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
kaf24@11310 1085 u32 nf = new_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
kaf24@11310 1086 /* Flip the NX bit, since it's the only one that decreases rights;
kaf24@11310 1087 * we calculate as if it were an "X" bit. */
kaf24@11310 1088 of ^= _PAGE_NX_BIT;
kaf24@11310 1089 nf ^= _PAGE_NX_BIT;
kaf24@11310 1090 /* If the changed bits are all set in the new flags, then rights strictly
kaf24@11310 1091 * increased between old and new. */
kaf24@11310 1092 return ((of | (of ^ nf)) == nf);
kaf24@11310 1093 }
kaf24@11310 1094
kaf24@11310 1095 static int inline
kaf24@11310 1096 shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
kaf24@11310 1097 {
kaf24@11310 1098 int res;
kaf24@11310 1099 mfn_t mfn;
kaf24@11310 1100 struct domain *owner;
Tim@12069 1101
Tim@12069 1102 ASSERT(!sh_l1e_is_magic(sl1e));
kaf24@11310 1103
kaf24@11310 1104 if ( !shadow_mode_refcounts(d) )
kaf24@11310 1105 return 1;
kaf24@11310 1106
Tim@12069 1107 res = get_page_from_l1e(sl1e, d);
kaf24@11310 1108
kaf24@11310 1109 // If a privileged domain is attempting to install a map of a page it does
kaf24@11310 1110 // not own, we let it succeed anyway.
kaf24@11310 1111 //
kaf24@11310 1112 if ( unlikely(!res) &&
kaf24@11310 1113 !shadow_mode_translate(d) &&
Tim@12603 1114 mfn_valid(mfn = shadow_l1e_get_mfn(sl1e)) &&
kaf24@11310 1115 (owner = page_get_owner(mfn_to_page(mfn))) &&
keir@16856 1116 (d != owner) &&
keir@16856 1117 IS_PRIV_FOR(d, owner))
kaf24@11310 1118 {
Tim@12069 1119 res = get_page_from_l1e(sl1e, owner);
kaf24@11310 1120 SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
kaf24@11310 1121 "which is owned by domain %d: %s\n",
kaf24@11310 1122 d->domain_id, mfn_x(mfn), owner->domain_id,
kaf24@11310 1123 res ? "success" : "failed");
kaf24@11310 1124 }
kaf24@11310 1125
kaf24@11310 1126 if ( unlikely(!res) )
kaf24@11310 1127 {
kfraser@14595 1128 perfc_incr(shadow_get_page_fail);
kaf24@11310 1129 SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
kaf24@11310 1130 }
kaf24@11310 1131
kaf24@11310 1132 return res;
kaf24@11310 1133 }
kaf24@11310 1134
kaf24@11310 1135 static void inline
kaf24@11310 1136 shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
kaf24@11310 1137 {
kaf24@11310 1138 if ( !shadow_mode_refcounts(d) )
kaf24@11310 1139 return;
kaf24@11310 1140
kaf24@11310 1141 put_page_from_l1e(sl1e, d);
kaf24@11310 1142 }
kaf24@11310 1143
kaf24@11310 1144 #if GUEST_PAGING_LEVELS >= 4
kaf24@11310 1145 static int shadow_set_l4e(struct vcpu *v,
kaf24@11310 1146 shadow_l4e_t *sl4e,
kaf24@11310 1147 shadow_l4e_t new_sl4e,
kaf24@11310 1148 mfn_t sl4mfn)
kaf24@11310 1149 {
Tim@12564 1150 int flags = 0, ok;
kaf24@11310 1151 shadow_l4e_t old_sl4e;
kaf24@11310 1152 paddr_t paddr;
kaf24@11310 1153 ASSERT(sl4e != NULL);
kaf24@11310 1154 old_sl4e = *sl4e;
kaf24@11310 1155
kaf24@11310 1156 if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
kaf24@11310 1157
kaf24@11310 1158 paddr = ((((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT)
kaf24@11310 1159 | (((unsigned long)sl4e) & ~PAGE_MASK));
kaf24@11310 1160
kaf24@11310 1161 if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
Tim@12564 1162 {
kaf24@11310 1163 /* About to install a new reference */
Tim@12564 1164 mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e);
Tim@12564 1165 ok = sh_get_ref(v, sl3mfn, paddr);
Tim@12564 1166 /* Are we pinning l3 shadows to handle wierd linux behaviour? */
Tim@12564 1167 if ( sh_type_is_pinnable(v, SH_type_l3_64_shadow) )
Tim@12564 1168 ok |= sh_pin(v, sl3mfn);
Tim@12564 1169 if ( !ok )
Tim@12563 1170 {
Tim@12563 1171 domain_crash(v->domain);
Tim@12563 1172 return SHADOW_SET_ERROR;
Tim@12563 1173 }
keir@17903 1174 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
keir@17903 1175 shadow_resync_all(v, 0);
keir@17903 1176 #endif
Tim@12564 1177 }
kaf24@11310 1178
kaf24@11310 1179 /* Write the new entry */
kaf24@11310 1180 shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
kaf24@11310 1181 flags |= SHADOW_SET_CHANGED;
kaf24@11310 1182
kaf24@11310 1183 if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT )
kaf24@11310 1184 {
kaf24@11310 1185 /* We lost a reference to an old mfn. */
kaf24@11310 1186 mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
kaf24@11310 1187 if ( (mfn_x(osl3mfn) != mfn_x(shadow_l4e_get_mfn(new_sl4e)))
kaf24@11310 1188 || !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e),
kaf24@11310 1189 shadow_l4e_get_flags(new_sl4e)) )
kaf24@11310 1190 {
kaf24@11310 1191 flags |= SHADOW_SET_FLUSH;
kaf24@11310 1192 }
kaf24@11310 1193 sh_put_ref(v, osl3mfn, paddr);
kaf24@11310 1194 }
kaf24@11310 1195 return flags;
kaf24@11310 1196 }
Tim@11867 1197
kaf24@11310 1198 static int shadow_set_l3e(struct vcpu *v,
kaf24@11310 1199 shadow_l3e_t *sl3e,
kaf24@11310 1200 shadow_l3e_t new_sl3e,
kaf24@11310 1201 mfn_t sl3mfn)
kaf24@11310 1202 {
kaf24@11310 1203 int flags = 0;
kaf24@11310 1204 shadow_l3e_t old_sl3e;
kaf24@11310 1205 paddr_t paddr;
kaf24@11310 1206 ASSERT(sl3e != NULL);
kaf24@11310 1207 old_sl3e = *sl3e;
kaf24@11310 1208
kaf24@11310 1209 if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
kaf24@11310 1210
kaf24@11310 1211 paddr = ((((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT)
kaf24@11310 1212 | (((unsigned long)sl3e) & ~PAGE_MASK));
kaf24@11310 1213
Tim@12563 1214 if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT )
keir@17903 1215 {
kaf24@11310 1216 /* About to install a new reference */
Tim@12564 1217 if ( !sh_get_ref(v, shadow_l3e_get_mfn(new_sl3e), paddr) )
Tim@12563 1218 {
Tim@12563 1219 domain_crash(v->domain);
Tim@12563 1220 return SHADOW_SET_ERROR;
keir@17903 1221 }
keir@17903 1222 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
keir@17903 1223 shadow_resync_all(v, 0);
keir@17903 1224 #endif
keir@17903 1225 }
kaf24@11310 1226
kaf24@11310 1227 /* Write the new entry */
kaf24@11310 1228 shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
kaf24@11310 1229 flags |= SHADOW_SET_CHANGED;
kaf24@11310 1230
kaf24@11310 1231 if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT )
kaf24@11310 1232 {
kaf24@11310 1233 /* We lost a reference to an old mfn. */
kaf24@11310 1234 mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
kaf24@11310 1235 if ( (mfn_x(osl2mfn) != mfn_x(shadow_l3e_get_mfn(new_sl3e))) ||
kaf24@11310 1236 !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e),
kaf24@11310 1237 shadow_l3e_get_flags(new_sl3e)) )
kaf24@11310 1238 {
kaf24@11310 1239 flags |= SHADOW_SET_FLUSH;
kaf24@11310 1240 }
kaf24@11310 1241 sh_put_ref(v, osl2mfn, paddr);
kaf24@11310 1242 }
kaf24@11310 1243 return flags;
kaf24@11310 1244 }
Tim@11867 1245 #endif /* GUEST_PAGING_LEVELS >= 4 */
kaf24@11310 1246
kaf24@11310 1247 static int shadow_set_l2e(struct vcpu *v,
kaf24@11310 1248 shadow_l2e_t *sl2e,
kaf24@11310 1249 shadow_l2e_t new_sl2e,
kaf24@11310 1250 mfn_t sl2mfn)
kaf24@11310 1251 {
kaf24@11310 1252 int flags = 0;
kaf24@11310 1253 shadow_l2e_t old_sl2e;
kaf24@11310 1254 paddr_t paddr;
kaf24@11310 1255
keir@17620 1256 #if GUEST_PAGING_LEVELS == 2
kaf24@11310 1257 /* In 2-on-3 we work with pairs of l2es pointing at two-page
kaf24@11310 1258 * shadows. Reference counting and up-pointers track from the first
kaf24@11310 1259 * page of the shadow to the first l2e, so make sure that we're
kaf24@11310 1260 * working with those:
kaf24@11310 1261 * Align the pointer down so it's pointing at the first of the pair */
kaf24@11310 1262 sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~(sizeof(shadow_l2e_t)));
kaf24@11310 1263 /* Align the mfn of the shadow entry too */
kaf24@11310 1264 new_sl2e.l2 &= ~(1<<PAGE_SHIFT);
kaf24@11310 1265 #endif
kaf24@11310 1266
kaf24@11310 1267 ASSERT(sl2e != NULL);
kaf24@11310 1268 old_sl2e = *sl2e;
kaf24@11310 1269
kaf24@11310 1270 if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
kaf24@11310 1271
kaf24@11310 1272 paddr = ((((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
kaf24@11310 1273 | (((unsigned long)sl2e) & ~PAGE_MASK));
kaf24@11310 1274
kaf24@11310 1275 if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
keir@17903 1276 {
keir@17903 1277 mfn_t sl1mfn = shadow_l2e_get_mfn(new_sl2e);
keir@17903 1278
kaf24@11310 1279 /* About to install a new reference */
keir@17903 1280 if ( !sh_get_ref(v, sl1mfn, paddr) )
Tim@12563 1281 {
Tim@12563 1282 domain_crash(v->domain);
Tim@12563 1283 return SHADOW_SET_ERROR;
keir@17903 1284 }
keir@17903 1285 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17903 1286 {
keir@17903 1287 struct shadow_page_info *sp = mfn_to_shadow_page(sl1mfn);
keir@17903 1288 mfn_t gl1mfn = _mfn(sp->backpointer);
keir@17903 1289
keir@17903 1290 /* If the shadow is a fl1 then the backpointer contains
keir@17903 1291 the GFN instead of the GMFN, and it's definitely not
keir@17903 1292 OOS. */
keir@17903 1293 if ( (sp->type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
keir@17903 1294 && mfn_is_out_of_sync(gl1mfn) )
keir@17903 1295 sh_resync(v, gl1mfn);
keir@17903 1296 }
keir@17903 1297 #endif
keir@17903 1298 }
kaf24@11310 1299
kaf24@11310 1300 /* Write the new entry */
keir@17620 1301 #if GUEST_PAGING_LEVELS == 2
kaf24@11310 1302 {
kaf24@11310 1303 shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
kaf24@11310 1304 /* The l1 shadow is two pages long and need to be pointed to by
kaf24@11310 1305 * two adjacent l1es. The pair have the same flags, but point
kaf24@11310 1306 * at odd and even MFNs */
kaf24@11310 1307 ASSERT(!(pair[0].l2 & (1<<PAGE_SHIFT)));
kaf24@11310 1308 pair[1].l2 |= (1<<PAGE_SHIFT);
kaf24@11310 1309 shadow_write_entries(sl2e, &pair, 2, sl2mfn);
kaf24@11310 1310 }
kaf24@11310 1311 #else /* normal case */
kaf24@11310 1312 shadow_write_entries(sl2e, &new_sl2e, 1, sl2mfn);
kaf24@11310 1313 #endif
kaf24@11310 1314 flags |= SHADOW_SET_CHANGED;
kaf24@11310 1315
kaf24@11310 1316 if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
kaf24@11310 1317 {
kaf24@11310 1318 /* We lost a reference to an old mfn. */
kaf24@11310 1319 mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
kaf24@11310 1320 if ( (mfn_x(osl1mfn) != mfn_x(shadow_l2e_get_mfn(new_sl2e))) ||
kaf24@11310 1321 !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e),
kaf24@11310 1322 shadow_l2e_get_flags(new_sl2e)) )
kaf24@11310 1323 {
kaf24@11310 1324 flags |= SHADOW_SET_FLUSH;
kaf24@11310 1325 }
kaf24@11310 1326 sh_put_ref(v, osl1mfn, paddr);
kaf24@11310 1327 }
kaf24@11310 1328 return flags;
kaf24@11310 1329 }
kaf24@11310 1330
keir@17571 1331 static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
keir@17571 1332 shadow_l1e_t *sl1e,
keir@17571 1333 mfn_t sl1mfn,
keir@17571 1334 struct domain *d)
keir@17571 1335 {
keir@17571 1336 mfn_t mfn;
keir@17571 1337 unsigned long gfn;
keir@17571 1338
keir@17571 1339 if ( !d->dirty_vram ) return;
keir@17571 1340
keir@17571 1341 mfn = shadow_l1e_get_mfn(new_sl1e);
keir@17621 1342
keir@17621 1343 if ( !mfn_valid(mfn) ) return; /* m2p for mmio_direct may not exist */
keir@17621 1344
keir@17571 1345 gfn = mfn_to_gfn(d, mfn);
keir@17571 1346
keir@17571 1347 if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
keir@17571 1348 unsigned long i = gfn - d->dirty_vram->begin_pfn;
keir@17571 1349 struct page_info *page = mfn_to_page(mfn);
keir@17571 1350 u32 count_info = page->u.inuse.type_info & PGT_count_mask;
keir@17571 1351
keir@17571 1352 if ( count_info == 1 )
keir@17571 1353 /* Initial guest reference, record it */
keir@17571 1354 d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
keir@17573 1355 | ((unsigned long)sl1e & ~PAGE_MASK);
keir@17571 1356 }
keir@17571 1357 }
keir@17571 1358
keir@17571 1359 static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
keir@17571 1360 shadow_l1e_t *sl1e,
keir@17571 1361 mfn_t sl1mfn,
keir@17571 1362 struct domain *d)
keir@17571 1363 {
keir@17571 1364 mfn_t mfn;
keir@17571 1365 unsigned long gfn;
keir@17571 1366
keir@17571 1367 if ( !d->dirty_vram ) return;
keir@17571 1368
keir@17571 1369 mfn = shadow_l1e_get_mfn(old_sl1e);
keir@17621 1370
keir@17621 1371 if ( !mfn_valid(mfn) ) return;
keir@17621 1372
keir@17571 1373 gfn = mfn_to_gfn(d, mfn);
keir@17571 1374
keir@17571 1375 if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
keir@17571 1376 unsigned long i = gfn - d->dirty_vram->begin_pfn;
keir@17571 1377 struct page_info *page = mfn_to_page(mfn);
keir@17571 1378 u32 count_info = page->u.inuse.type_info & PGT_count_mask;
keir@17571 1379 int dirty = 0;
keir@17573 1380 paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
keir@17573 1381 | ((unsigned long)sl1e & ~PAGE_MASK);
keir@17571 1382
keir@17571 1383 if ( count_info == 1 ) {
keir@17571 1384 /* Last reference */
keir@17571 1385 if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
keir@17571 1386 /* We didn't know it was that one, let's say it is dirty */
keir@17571 1387 dirty = 1;
keir@17571 1388 } else {
keir@17571 1389 ASSERT(d->dirty_vram->sl1ma[i] == sl1ma);
keir@17571 1390 d->dirty_vram->sl1ma[i] = INVALID_PADDR;
keir@17571 1391 if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_DIRTY )
keir@17571 1392 dirty = 1;
keir@17571 1393 }
keir@17571 1394 } else {
keir@17571 1395 /* We had more than one reference, just consider the page dirty. */
keir@17571 1396 dirty = 1;
keir@17571 1397 /* Check that it's not the one we recorded. */
keir@17571 1398 if ( d->dirty_vram->sl1ma[i] == sl1ma ) {
keir@17571 1399 /* Too bad, we remembered the wrong one... */
keir@17571 1400 d->dirty_vram->sl1ma[i] = INVALID_PADDR;
keir@17571 1401 } else {
keir@17571 1402 /* Ok, our recorded sl1e is still pointing to this page, let's
keir@17571 1403 * just hope it will remain. */
keir@17571 1404 }
keir@17571 1405 }
keir@17616 1406 if ( dirty ) {
keir@17575 1407 d->dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
keir@17616 1408 d->dirty_vram->last_dirty = NOW();
keir@17616 1409 }
keir@17571 1410 }
keir@17571 1411 }
keir@17571 1412
kaf24@11310 1413 static int shadow_set_l1e(struct vcpu *v,
kaf24@11310 1414 shadow_l1e_t *sl1e,
kaf24@11310 1415 shadow_l1e_t new_sl1e,
kaf24@11310 1416 mfn_t sl1mfn)
kaf24@11310 1417 {
kaf24@11310 1418 int flags = 0;
kaf24@11310 1419 struct domain *d = v->domain;
kaf24@11310 1420 shadow_l1e_t old_sl1e;
keir@17904 1421 #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
keir@17904 1422 mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e);
keir@17904 1423 #endif
kaf24@11310 1424 ASSERT(sl1e != NULL);
keir@18331 1425
keir@18331 1426 #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
keir@18331 1427 if ( mfn_valid(new_gmfn) && mfn_oos_may_write(new_gmfn)
keir@18331 1428 && ((shadow_l1e_get_flags(new_sl1e) & (_PAGE_RW|_PAGE_PRESENT))
keir@18331 1429 == (_PAGE_RW|_PAGE_PRESENT)) )
keir@18331 1430 oos_fixup_add(v, new_gmfn, sl1mfn, pgentry_ptr_to_slot(sl1e));
keir@18331 1431 #endif
kaf24@11310 1432
kaf24@11310 1433 old_sl1e = *sl1e;
kaf24@11310 1434
kaf24@11310 1435 if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
kaf24@11310 1436
Tim@12069 1437 if ( (shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT)
Tim@12069 1438 && !sh_l1e_is_magic(new_sl1e) )
kaf24@11310 1439 {
kaf24@11310 1440 /* About to install a new reference */
kaf24@11310 1441 if ( shadow_mode_refcounts(d) ) {
keir@18454 1442 TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
kaf24@11310 1443 if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 )
kaf24@11310 1444 {
kaf24@11310 1445 /* Doesn't look like a pagetable. */
kaf24@11310 1446 flags |= SHADOW_SET_ERROR;
kaf24@11310 1447 new_sl1e = shadow_l1e_empty();
keir@17904 1448 }
keir@17904 1449 else
keir@17904 1450 {
keir@17571 1451 shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
kaf24@11310 1452 }
kaf24@11310 1453 }
kaf24@11310 1454 }
kaf24@11310 1455
kaf24@11310 1456 /* Write the new entry */
kaf24@11310 1457 shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
kaf24@11310 1458 flags |= SHADOW_SET_CHANGED;
kaf24@11310 1459
Tim@12069 1460 if ( (shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT)
Tim@12069 1461 && !sh_l1e_is_magic(old_sl1e) )
kaf24@11310 1462 {
kaf24@11310 1463 /* We lost a reference to an old mfn. */
kaf24@11310 1464 /* N.B. Unlike higher-level sets, never need an extra flush
kaf24@11310 1465 * when writing an l1e. Because it points to the same guest frame
kaf24@11310 1466 * as the guest l1e did, it's the guest's responsibility to
kaf24@11310 1467 * trigger a flush later. */
kaf24@11310 1468 if ( shadow_mode_refcounts(d) )
kaf24@11310 1469 {
keir@17571 1470 shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
kaf24@11310 1471 shadow_put_page_from_l1e(old_sl1e, d);
keir@18454 1472 TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
kaf24@11310 1473 }
kaf24@11310 1474 }
kaf24@11310 1475 return flags;
kaf24@11310 1476 }
kaf24@11310 1477
kaf24@11310 1478
kaf24@11310 1479 /**************************************************************************/
kaf24@11310 1480 /* Macros to walk pagetables. These take the shadow of a pagetable and
kaf24@11310 1481 * walk every "interesting" entry. That is, they don't touch Xen mappings,
kaf24@11310 1482 * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every
kaf24@11310 1483 * second entry (since pairs of entries are managed together). For multi-page
kaf24@11310 1484 * shadows they walk all pages.
kaf24@11310 1485 *
kaf24@11310 1486 * Arguments are an MFN, the variable to point to each entry, a variable
kaf24@11310 1487 * to indicate that we are done (we will shortcut to the end of the scan
kaf24@11310 1488 * when _done != 0), a variable to indicate that we should avoid Xen mappings,
kaf24@11310 1489 * and the code.
kaf24@11310 1490 *
kaf24@11310 1491 * WARNING: These macros have side-effects. They change the values of both
kaf24@11310 1492 * the pointer and the MFN. */
kaf24@11310 1493
kaf24@11310 1494 static inline void increment_ptr_to_guest_entry(void *ptr)
kaf24@11310 1495 {
kaf24@11310 1496 if ( ptr )
kaf24@11310 1497 {
kaf24@11310 1498 guest_l1e_t **entry = ptr;
kaf24@11310 1499 (*entry)++;
kaf24@11310 1500 }
kaf24@11310 1501 }
kaf24@11310 1502
kaf24@11310 1503 /* All kinds of l1: touch all entries */
Tim@12561 1504 #define _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \
kaf24@11310 1505 do { \
kaf24@11310 1506 int _i; \
keir@17620 1507 shadow_l1e_t *_sp = sh_map_domain_page((_sl1mfn)); \
Tim@12561 1508 ASSERT(mfn_to_shadow_page(_sl1mfn)->type == SH_type_l1_shadow \
Tim@12561 1509 || mfn_to_shadow_page(_sl1mfn)->type == SH_type_fl1_shadow); \
kaf24@11310 1510 for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ ) \
kaf24@11310 1511 { \
kaf24@11310 1512 (_sl1e) = _sp + _i; \
kaf24@11310 1513 if ( shadow_l1e_get_flags(*(_sl1e)) & _PAGE_PRESENT ) \
kaf24@11310 1514 {_code} \
kaf24@11310 1515 if ( _done ) break; \
kaf24@11310 1516 increment_ptr_to_guest_entry(_gl1p); \
kaf24@11310 1517 } \
keir@17620 1518 sh_unmap_domain_page(_sp); \
kaf24@11310 1519 } while (0)
kaf24@11310 1520
kaf24@11310 1521 /* 32-bit l1, on PAE or 64-bit shadows: need to walk both pages of shadow */
kaf24@11310 1522 #if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
Tim@12561 1523 #define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \
kaf24@11310 1524 do { \
kaf24@11310 1525 int __done = 0; \
Tim@12561 1526 _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, \
kaf24@11310 1527 ({ (__done = _done); }), _code); \
kaf24@11310 1528 _sl1mfn = _mfn(mfn_x(_sl1mfn) + 1); \
kaf24@11310 1529 if ( !__done ) \
Tim@12561 1530 _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, \
kaf24@11310 1531 ({ (__done = _done); }), _code); \
kaf24@11310 1532 } while (0)
kaf24@11310 1533 #else /* Everything else; l1 shadows are only one page */
Tim@12561 1534 #define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code) \
kaf24@11310 1535 _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)
kaf24@11310 1536 #endif
kaf24@11310 1537
kaf24@11310 1538
keir@17620 1539 #if GUEST_PAGING_LEVELS == 2
kaf24@11310 1540
kaf24@11310 1541 /* 32-bit l2 on PAE/64: four pages, touch every second entry, and avoid Xen */
ack@14013 1542 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \
kaf24@11310 1543 do { \
kaf24@11310 1544 int _i, _j, __done = 0; \
ack@14013 1545 int _xen = !shadow_mode_external(_dom); \
Tim@12561 1546 ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow); \
kaf24@11310 1547 for ( _j = 0; _j < 4 && !__done; _j++ ) \
kaf24@11310 1548 { \
keir@17620 1549 shadow_l2e_t *_sp = sh_map_domain_page(_sl2mfn); \
kaf24@11310 1550 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i += 2 ) \
kaf24@11310 1551 if ( (!(_xen)) \
kaf24@11310 1552 || ((_j * SHADOW_L2_PAGETABLE_ENTRIES) + _i) \
kaf24@11310 1553 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT) ) \
kaf24@11310 1554 { \
kaf24@11310 1555 (_sl2e) = _sp + _i; \
kaf24@11310 1556 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
kaf24@11310 1557 {_code} \
kaf24@11310 1558 if ( (__done = (_done)) ) break; \
kaf24@11310 1559 increment_ptr_to_guest_entry(_gl2p); \
kaf24@11310 1560 } \
keir@17620 1561 sh_unmap_domain_page(_sp); \
kaf24@11310 1562 _sl2mfn = _mfn(mfn_x(_sl2mfn) + 1); \
kaf24@11310 1563 } \
kaf24@11310 1564 } while (0)
kaf24@11310 1565
kaf24@11310 1566 #elif GUEST_PAGING_LEVELS == 3
kaf24@11310 1567
kaf24@11310 1568 /* PAE: if it's an l2h, don't touch Xen mappings */
ack@14013 1569 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \
kaf24@11310 1570 do { \
kaf24@11310 1571 int _i; \
ack@14013 1572 int _xen = !shadow_mode_external(_dom); \
keir@17620 1573 shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \
Tim@12561 1574 ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_pae_shadow \
Tim@12561 1575 || mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_pae_shadow);\
kaf24@11310 1576 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
kaf24@11310 1577 if ( (!(_xen)) \
Tim@12561 1578 || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_pae_shadow\
kaf24@11310 1579 || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES)) \
kaf24@11310 1580 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
kaf24@11310 1581 { \
kaf24@11310 1582 (_sl2e) = _sp + _i; \
kaf24@11310 1583 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
kaf24@11310 1584 {_code} \
kaf24@11310 1585 if ( _done ) break; \
kaf24@11310 1586 increment_ptr_to_guest_entry(_gl2p); \
kaf24@11310 1587 } \
keir@17620 1588 sh_unmap_domain_page(_sp); \
kaf24@11310 1589 } while (0)
kaf24@11310 1590
kaf24@11310 1591 #else
kaf24@11310 1592
ack@14013 1593 /* 64-bit l2: touch all entries except for PAE compat guests. */
ack@14013 1594 #define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _dom, _code) \
ack@14013 1595 do { \
ack@14013 1596 int _i; \
ack@14013 1597 int _xen = !shadow_mode_external(_dom); \
keir@17620 1598 shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \
ack@14013 1599 ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_64_shadow || \
ack@14013 1600 mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_64_shadow); \
ack@14013 1601 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
ack@14013 1602 { \
ack@14013 1603 if ( (!(_xen)) \
kfraser@14974 1604 || !is_pv_32on64_domain(_dom) \
ack@14013 1605 || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_64_shadow \
ack@14013 1606 || (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) ) \
ack@14013 1607 { \
ack@14013 1608 (_sl2e) = _sp + _i; \
ack@14013 1609 if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT ) \
ack@14013 1610 {_code} \
ack@14013 1611 if ( _done ) break; \
ack@14013 1612 increment_ptr_to_guest_entry(_gl2p); \
ack@14013 1613 } \
ack@14013 1614 } \
keir@17620 1615 sh_unmap_domain_page(_sp); \
kaf24@11310 1616 } while (0)
kaf24@11310 1617
kaf24@11310 1618 #endif /* different kinds of l2 */
kaf24@11310 1619
Tim@11867 1620 #if GUEST_PAGING_LEVELS == 4
kaf24@11310 1621
kaf24@11310 1622 /* 64-bit l3: touch all entries */
Tim@12561 1623 #define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code) \
kaf24@11310 1624 do { \
kaf24@11310 1625 int _i; \
keir@17620 1626 shadow_l3e_t *_sp = sh_map_domain_page((_sl3mfn)); \
Tim@12561 1627 ASSERT(mfn_to_shadow_page(_sl3mfn)->type == SH_type_l3_64_shadow); \
kaf24@11310 1628 for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ ) \
kaf24@11310 1629 { \
kaf24@11310 1630 (_sl3e) = _sp + _i; \
kaf24@11310 1631 if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT ) \
kaf24@11310 1632 {_code} \
kaf24@11310 1633 if ( _done ) break; \
kaf24@11310 1634 increment_ptr_to_guest_entry(_gl3p); \
kaf24@11310 1635 } \
keir@17620 1636 sh_unmap_domain_page(_sp); \
kaf24@11310 1637 } while (0)
kaf24@11310 1638
kaf24@11310 1639 /* 64-bit l4: avoid Xen mappings */
ack@14013 1640 #define SHADOW_FOREACH_L4E(_sl4mfn, _sl4e, _gl4p, _done, _dom, _code) \
kaf24@11310 1641 do { \
keir@17620 1642 shadow_l4e_t *_sp = sh_map_domain_page((_sl4mfn)); \
ack@14013 1643 int _xen = !shadow_mode_external(_dom); \
kaf24@11310 1644 int _i; \
Tim@12561 1645 ASSERT(mfn_to_shadow_page(_sl4mfn)->type == SH_type_l4_64_shadow); \
kaf24@11310 1646 for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ ) \
kaf24@11310 1647 { \
ack@14013 1648 if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) ) \
kaf24@11310 1649 { \
kaf24@11310 1650 (_sl4e) = _sp + _i; \
kaf24@11310 1651 if ( shadow_l4e_get_flags(*(_sl4e)) & _PAGE_PRESENT ) \
kaf24@11310 1652 {_code} \
kaf24@11310 1653 if ( _done ) break; \
kaf24@11310 1654 } \
kaf24@11310 1655 increment_ptr_to_guest_entry(_gl4p); \
kaf24@11310 1656 } \
keir@17620 1657 sh_unmap_domain_page(_sp); \
kaf24@11310 1658 } while (0)
kaf24@11310 1659
kaf24@11310 1660 #endif
kaf24@11310 1661
kaf24@11310 1662
kaf24@11310 1663
kaf24@11310 1664 /**************************************************************************/
kaf24@11310 1665 /* Functions to install Xen mappings and linear mappings in shadow pages */
kaf24@11310 1666
kaf24@11310 1667 // XXX -- this function should probably be moved to shadow-common.c, but that
kaf24@11310 1668 // probably wants to wait until the shadow types have been moved from
kaf24@11310 1669 // shadow-types.h to shadow-private.h
kaf24@11310 1670 //
kaf24@11310 1671 #if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
kaf24@11310 1672 void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
kaf24@11310 1673 {
kaf24@11310 1674 struct domain *d = v->domain;
kaf24@11310 1675 shadow_l4e_t *sl4e;
kaf24@11310 1676
kaf24@11310 1677 sl4e = sh_map_domain_page(sl4mfn);
kaf24@11310 1678 ASSERT(sl4e != NULL);
kaf24@11310 1679 ASSERT(sizeof (l4_pgentry_t) == sizeof (shadow_l4e_t));
kaf24@11310 1680
kaf24@11310 1681 /* Copy the common Xen mappings from the idle domain */
kaf24@11310 1682 memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
kaf24@11310 1683 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
kaf24@11310 1684 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
kaf24@11310 1685
kaf24@11310 1686 /* Install the per-domain mappings for this domain */
kaf24@11310 1687 sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] =
kaf24@11310 1688 shadow_l4e_from_mfn(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3)),
kaf24@11310 1689 __PAGE_HYPERVISOR);
kaf24@11310 1690
keir@17025 1691 /* Shadow linear mapping for 4-level shadows. N.B. for 3-level
keir@17025 1692 * shadows on 64-bit xen, this linear mapping is later replaced by the
keir@17025 1693 * monitor pagetable structure, which is built in make_monitor_table
keir@17025 1694 * and maintained by sh_update_linear_entries. */
kaf24@11310 1695 sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
kaf24@11310 1696 shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
kaf24@11310 1697
keir@17025 1698 /* Self linear mapping. */
tim@11666 1699 if ( shadow_mode_translate(v->domain) && !shadow_mode_external(v->domain) )
tim@11666 1700 {
tim@11666 1701 // linear tables may not be used with translated PV guests
tim@11666 1702 sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
tim@11666 1703 shadow_l4e_empty();
tim@11666 1704 }
tim@11666 1705 else
tim@11666 1706 {
tim@11666 1707 sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
tim@11666 1708 shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
tim@11666 1709 }
tim@11666 1710
kaf24@11310 1711 if ( shadow_mode_translate(v->domain) )
kaf24@11310 1712 {
kaf24@11310 1713 /* install domain-specific P2M table */
kaf24@11310 1714 sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] =
kaf24@11310 1715 shadow_l4e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
kaf24@11310 1716 __PAGE_HYPERVISOR);
kaf24@11310 1717 }
kaf24@11310 1718
kaf24@11310 1719 sh_unmap_domain_page(sl4e);
kaf24@11310 1720 }
kaf24@11310 1721 #endif
kaf24@11310 1722
ack@14013 1723 #if CONFIG_PAGING_LEVELS >= 3 && GUEST_PAGING_LEVELS >= 3
kaf24@11310 1724 // For 3-on-3 PV guests, we need to make sure the xen mappings are in
kaf24@11310 1725 // place, which means that we need to populate the l2h entry in the l3
kaf24@11310 1726 // table.
kaf24@11310 1727
ack@14013 1728 static void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn)
kaf24@11310 1729 {
kaf24@11310 1730 struct domain *d = v->domain;
kaf24@11310 1731 shadow_l2e_t *sl2e;
ack@13311 1732 #if CONFIG_PAGING_LEVELS == 3
kaf24@11310 1733 int i;
ack@13311 1734 #else
ack@13311 1735
kfraser@14974 1736 if ( !is_pv_32on64_vcpu(v) )
ack@13311 1737 return;
ack@13311 1738 #endif
kaf24@11310 1739
kaf24@11310 1740 sl2e = sh_map_domain_page(sl2hmfn);
kaf24@11310 1741 ASSERT(sl2e != NULL);
kaf24@11310 1742 ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
kaf24@11310 1743
ack@13311 1744 #if CONFIG_PAGING_LEVELS == 3
ack@13311 1745
kaf24@11310 1746 /* Copy the common Xen mappings from the idle domain */
kaf24@11310 1747 memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
kaf24@11310 1748 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
kaf24@11310 1749 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
kaf24@11310 1750
kaf24@11310 1751 /* Install the per-domain mappings for this domain */
kaf24@11310 1752 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
kaf24@11310 1753 sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
kaf24@11310 1754 shadow_l2e_from_mfn(
kaf24@11310 1755 page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
kaf24@11310 1756 __PAGE_HYPERVISOR);
kaf24@11310 1757
kaf24@11310 1758 /* We don't set up a linear mapping here because we can't until this
kaf24@11310 1759 * l2h is installed in an l3e. sh_update_linear_entries() handles
Tim@11867 1760 * the linear mappings when CR3 (and so the fourth l3e) is loaded.
Tim@11867 1761 * We zero them here, just as a safety measure.
tim@11666 1762 */
tim@11666 1763 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
tim@11666 1764 sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START) + i] =
tim@11666 1765 shadow_l2e_empty();
tim@11666 1766 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
tim@11666 1767 sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START) + i] =
tim@11666 1768 shadow_l2e_empty();
kaf24@11310 1769
kaf24@11310 1770 if ( shadow_mode_translate(d) )
kaf24@11310 1771 {
kaf24@11310 1772 /* Install the domain-specific p2m table */
kaf24@11310 1773 l3_pgentry_t *p2m;
kaf24@11310 1774 ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
kaf24@11310 1775 p2m = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
kaf24@11310 1776 for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
kaf24@11310 1777 {
kaf24@11310 1778 sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START) + i] =
tim@11420 1779 (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
tim@11420 1780 ? shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])),
tim@11420 1781 __PAGE_HYPERVISOR)
tim@11420 1782 : shadow_l2e_empty();
kaf24@11310 1783 }
kaf24@11310 1784 sh_unmap_domain_page(p2m);
kaf24@11310 1785 }
ack@13311 1786
ack@13311 1787 #else
ack@13311 1788
ack@13311 1789 /* Copy the common Xen mappings from the idle domain */
ack@14013 1790 memcpy(
ack@14013 1791 &sl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
ack@14013 1792 &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
ack@14013 1793 COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*sl2e));
ack@13311 1794
ack@13311 1795 #endif
kaf24@11310 1796
kaf24@11310 1797 sh_unmap_domain_page(sl2e);
kaf24@11310 1798 }
kaf24@11310 1799 #endif
kaf24@11310 1800
kaf24@11310 1801
kaf24@11310 1802
kaf24@11310 1803
kaf24@11310 1804
kaf24@11310 1805 /**************************************************************************/
kaf24@11310 1806 /* Create a shadow of a given guest page.
kaf24@11310 1807 */
kaf24@11310 1808 static mfn_t
kaf24@11310 1809 sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
kaf24@11310 1810 {
kaf24@11310 1811 mfn_t smfn = shadow_alloc(v->domain, shadow_type, mfn_x(gmfn));
kaf24@11310 1812 SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
kaf24@11310 1813 mfn_x(gmfn), shadow_type, mfn_x(smfn));
kaf24@11310 1814
Tim@12561 1815 if ( shadow_type != SH_type_l2_32_shadow
Tim@12561 1816 && shadow_type != SH_type_l2_pae_shadow
Tim@12561 1817 && shadow_type != SH_type_l2h_pae_shadow
Tim@12561 1818 && shadow_type != SH_type_l4_64_shadow )
kaf24@11310 1819 /* Lower-level shadow, not yet linked form a higher level */
Tim@12561 1820 mfn_to_shadow_page(smfn)->up = 0;
kaf24@11310 1821
Tim@12564 1822 #if GUEST_PAGING_LEVELS == 4
Tim@12564 1823 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
Tim@12564 1824 if ( shadow_type == SH_type_l4_64_shadow &&
Tim@13909 1825 unlikely(v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) )
Tim@12564 1826 {
Tim@12564 1827 /* We're shadowing a new l4, but we've been assuming the guest uses
Tim@12564 1828 * only one l4 per vcpu and context switches using an l4 entry.
Tim@12564 1829 * Count the number of active l4 shadows. If there are enough
Tim@12564 1830 * of them, decide that this isn't an old linux guest, and stop
Tim@12564 1831 * pinning l3es. This is not very quick but it doesn't happen
Tim@12564 1832 * very often. */
Tim@12564 1833 struct list_head *l, *t;
Tim@12564 1834 struct shadow_page_info *sp;
Tim@12564 1835 struct vcpu *v2;
Tim@12564 1836 int l4count = 0, vcpus = 0;
Tim@13909 1837 list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows)
Tim@12564 1838 {
Tim@12564 1839 sp = list_entry(l, struct shadow_page_info, list);
Tim@12564 1840 if ( sp->type == SH_type_l4_64_shadow )
Tim@12564 1841 l4count++;
Tim@12564 1842 }
Tim@12564 1843 for_each_vcpu ( v->domain, v2 )
Tim@12564 1844 vcpus++;
Tim@12564 1845 if ( l4count > 2 * vcpus )
Tim@12564 1846 {
Tim@12564 1847 /* Unpin all the pinned l3 tables, and don't pin any more. */
Tim@13909 1848 list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows)
Tim@12564 1849 {
Tim@12564 1850 sp = list_entry(l, struct shadow_page_info, list);
Tim@12564 1851 if ( sp->type == SH_type_l3_64_shadow )
Tim@12564 1852 sh_unpin(v, shadow_page_to_mfn(sp));
Tim@12564 1853 }
Tim@13909 1854 v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL;
Tim@12564 1855 }
Tim@12564 1856 }
Tim@12564 1857 #endif
Tim@12564 1858 #endif
Tim@12564 1859
kaf24@11310 1860 // Create the Xen mappings...
kaf24@11310 1861 if ( !shadow_mode_external(v->domain) )
kaf24@11310 1862 {
kaf24@11310 1863 switch (shadow_type)
kaf24@11310 1864 {
kaf24@11310 1865 #if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
Tim@12561 1866 case SH_type_l4_shadow:
kaf24@11310 1867 sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
kaf24@11310 1868 #endif
ack@14013 1869 #if CONFIG_PAGING_LEVELS >= 3 && GUEST_PAGING_LEVELS >= 3
Tim@12561 1870 case SH_type_l2h_shadow:
kaf24@11310 1871 sh_install_xen_entries_in_l2h(v, smfn); break;
kaf24@11310 1872 #endif
kaf24@11310 1873 default: /* Do nothing */ break;
kaf24@11310 1874 }
kaf24@11310 1875 }
Tim@13497 1876
kaf24@11310 1877 shadow_promote(v, gmfn, shadow_type);
kaf24@11310 1878 set_shadow_status(v, gmfn, shadow_type, smfn);
kaf24@11310 1879
kaf24@11310 1880 return smfn;
kaf24@11310 1881 }
kaf24@11310 1882
kaf24@11310 1883 /* Make a splintered superpage shadow */
kaf24@11310 1884 static mfn_t
kaf24@11310 1885 make_fl1_shadow(struct vcpu *v, gfn_t gfn)
kaf24@11310 1886 {
Tim@12561 1887 mfn_t smfn = shadow_alloc(v->domain, SH_type_fl1_shadow,
kaf24@11310 1888 (unsigned long) gfn_x(gfn));
kaf24@11310 1889
Tim@13909 1890 SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" PRI_mfn "\n",
kaf24@11310 1891 gfn_x(gfn), mfn_x(smfn));
kaf24@11310 1892
kaf24@11310 1893 set_fl1_shadow_status(v, gfn, smfn);
kaf24@11310 1894 return smfn;
kaf24@11310 1895 }
kaf24@11310 1896
kaf24@11310 1897
kaf24@11310 1898 #if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
kaf24@11310 1899 mfn_t
kaf24@11310 1900 sh_make_monitor_table(struct vcpu *v)
kaf24@11310 1901 {
Tim@13332 1902 struct domain *d = v->domain;
kaf24@11310 1903
kaf24@11310 1904 ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
kaf24@11310 1905
Tim@13332 1906 /* Guarantee we can get the memory we need */
keir@17025 1907 shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS);
Tim@13332 1908
kaf24@11310 1909 #if CONFIG_PAGING_LEVELS == 4
kaf24@11310 1910 {
kaf24@11310 1911 mfn_t m4mfn;
Tim@12561 1912 m4mfn = shadow_alloc(d, SH_type_monitor_table, 0);
kaf24@11310 1913 sh_install_xen_entries_in_l4(v, m4mfn, m4mfn);
kaf24@11310 1914 /* Remember the level of this table */
kaf24@11310 1915 mfn_to_page(m4mfn)->shadow_flags = 4;
kaf24@11310 1916 #if SHADOW_PAGING_LEVELS < 4
kaf24@11310 1917 {
keir@17025 1918 mfn_t m3mfn, m2mfn;
kaf24@11310 1919 l4_pgentry_t *l4e;
keir@17025 1920 l3_pgentry_t *l3e;
keir@17025 1921 /* Install an l3 table and an l2 table that will hold the shadow
keir@17025 1922 * linear map entries. This overrides the linear map entry that
keir@17025 1923 * was installed by sh_install_xen_entries_in_l4. */
keir@17025 1924 l4e = sh_map_domain_page(m4mfn);
keir@17025 1925
Tim@12561 1926 m3mfn = shadow_alloc(d, SH_type_monitor_table, 0);
kaf24@11310 1927 mfn_to_page(m3mfn)->shadow_flags = 3;
keir@17025 1928 l4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)]
keir@17025 1929 = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR);
keir@17025 1930
keir@17025 1931 m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
keir@17025 1932 mfn_to_page(m2mfn)->shadow_flags = 2;
keir@17025 1933 l3e = sh_map_domain_page(m3mfn);
keir@17025 1934 l3e[0] = l3e_from_pfn(mfn_x(m2mfn), __PAGE_HYPERVISOR);
keir@17025 1935 sh_unmap_domain_page(l3e);
keir@17025 1936
kfraser@14974 1937 if ( is_pv_32on64_vcpu(v) )
ack@13311 1938 {
keir@17025 1939 /* For 32-on-64 PV guests, we need to map the 32-bit Xen
keir@17025 1940 * area into its usual VAs in the monitor tables */
keir@17025 1941 m3mfn = shadow_alloc(d, SH_type_monitor_table, 0);
keir@17025 1942 mfn_to_page(m3mfn)->shadow_flags = 3;
keir@17025 1943 l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR);
keir@17025 1944
ack@13311 1945 m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
ack@13311 1946 mfn_to_page(m2mfn)->shadow_flags = 2;
ack@13311 1947 l3e = sh_map_domain_page(m3mfn);
ack@13311 1948 l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
ack@13311 1949 sh_install_xen_entries_in_l2h(v, m2mfn);
ack@13311 1950 sh_unmap_domain_page(l3e);
ack@13311 1951 }
keir@17025 1952
keir@17025 1953 sh_unmap_domain_page(l4e);
kaf24@11310 1954 }
kaf24@11310 1955 #endif /* SHADOW_PAGING_LEVELS < 4 */
kaf24@11310 1956 return m4mfn;
kaf24@11310 1957 }
kaf24@11310 1958
kaf24@11310 1959 #elif CONFIG_PAGING_LEVELS == 3
kaf24@11310 1960
kaf24@11310 1961 {
kaf24@11310 1962 mfn_t m3mfn, m2mfn;
kaf24@11310 1963 l3_pgentry_t *l3e;
kaf24@11310 1964 l2_pgentry_t *l2e;
kaf24@11310 1965 int i;
kaf24@11310 1966
Tim@12561 1967 m3mfn = shadow_alloc(d, SH_type_monitor_table, 0);
kaf24@11310 1968 /* Remember the level of this table */
kaf24@11310 1969 mfn_to_page(m3mfn)->shadow_flags = 3;
kaf24@11310 1970
kaf24@11310 1971 // Install a monitor l2 table in slot 3 of the l3 table.
kaf24@11310 1972 // This is used for all Xen entries, including linear maps
Tim@12561 1973 m2mfn = shadow_alloc(d, SH_type_monitor_table, 0);
kaf24@11310 1974 mfn_to_page(m2mfn)->shadow_flags = 2;
kaf24@11310 1975 l3e = sh_map_domain_page(m3mfn);
kaf24@11310 1976 l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
kaf24@11310 1977 sh_install_xen_entries_in_l2h(v, m2mfn);
kaf24@11310 1978 /* Install the monitor's own linear map */
kaf24@11310 1979 l2e = sh_map_domain_page(m2mfn);
kaf24@11310 1980 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
kaf24@11310 1981 l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
kaf24@11310 1982 (l3e_get_flags(l3e[i]) & _PAGE_PRESENT)
kaf24@11310 1983 ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR)
kaf24@11310 1984 : l2e_empty();
kaf24@11310 1985 sh_unmap_domain_page(l2e);
kaf24@11310 1986 sh_unmap_domain_page(l3e);
kaf24@11310 1987
kaf24@11310 1988 SHADOW_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
kaf24@11310 1989 return m3mfn;
kaf24@11310 1990 }
kaf24@11310 1991
kaf24@11310 1992 #else
kaf24@11310 1993 #error this should not happen
kaf24@11310 1994 #endif /* CONFIG_PAGING_LEVELS */
kaf24@11310 1995 }
kaf24@11310 1996 #endif /* SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS */
kaf24@11310 1997
kaf24@11310 1998 /**************************************************************************/
kaf24@11310 1999 /* These functions also take a virtual address and return the level-N
kaf24@11310 2000 * shadow table mfn and entry, but they create the shadow pagetables if
kaf24@11310 2001 * they are needed. The "demand" argument is non-zero when handling
kaf24@11310 2002 * a demand fault (so we know what to do about accessed bits &c).
kaf24@11310 2003 * If the necessary tables are not present in the guest, they return NULL. */
tim@11786 2004
tim@11786 2005 /* N.B. The use of GUEST_PAGING_LEVELS here is correct. If the shadow has
tim@11786 2006 * more levels than the guest, the upper levels are always fixed and do not
tim@11786 2007 * reflect any information from the guest, so we do not use these functions
tim@11786 2008 * to access them. */
tim@11786 2009
kaf24@11310 2010 #if GUEST_PAGING_LEVELS >= 4
kaf24@11310 2011 static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v,
kaf24@11310 2012 walk_t *gw,
kaf24@11310 2013 mfn_t *sl4mfn)
kaf24@11310 2014 {
kaf24@11310 2015 /* There is always a shadow of the top level table. Get it. */
Tim@11867 2016 *sl4mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
kaf24@11310 2017 /* Reading the top level table is always valid. */
kaf24@11310 2018 return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
kaf24@11310 2019 }
Tim@11867 2020
kaf24@11310 2021 static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v,
kaf24@11310 2022 walk_t *gw,
kaf24@11310 2023 mfn_t *sl3mfn,
kaf24@11310 2024 fetch_type_t ft)
kaf24@11310 2025 {
kaf24@11310 2026 mfn_t sl4mfn;
kaf24@11310 2027 shadow_l4e_t *sl4e;
Tim@12603 2028 if ( !mfn_valid(gw->l3mfn) ) return NULL; /* No guest page. */
kaf24@11310 2029 /* Get the l4e */
kaf24@11310 2030 sl4e = shadow_get_and_create_l4e(v, gw, &sl4mfn);
kaf24@11310 2031 ASSERT(sl4e != NULL);
kaf24@11310 2032 if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT )
kaf24@11310 2033 {
kaf24@11310 2034 *sl3mfn = shadow_l4e_get_mfn(*sl4e);
Tim@12603 2035 ASSERT(mfn_valid(*sl3mfn));
kaf24@11310 2036 }
kaf24@11310 2037 else
kaf24@11310 2038 {
kaf24@11310 2039 int r;
kaf24@11310 2040 shadow_l4e_t new_sl4e;
kaf24@11310 2041 /* No l3 shadow installed: find and install it. */
Tim@12561 2042 *sl3mfn = get_shadow_status(v, gw->l3mfn, SH_type_l3_shadow);
Tim@12603 2043 if ( !mfn_valid(*sl3mfn) )
kaf24@11310 2044 {
kaf24@11310 2045 /* No l3 shadow of this page exists at all: make one. */
Tim@12561 2046 *sl3mfn = sh_make_shadow(v, gw->l3mfn, SH_type_l3_shadow);
kaf24@11310 2047 }
kaf24@11310 2048 /* Install the new sl3 table in the sl4e */
Tim@16313 2049 l4e_propagate_from_guest(v, gw->l4e, *sl3mfn, &new_sl4e, ft);
kaf24@11310 2050 r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
kaf24@11310 2051 ASSERT((r & SHADOW_SET_FLUSH) == 0);
Tim@12563 2052 if ( r & SHADOW_SET_ERROR )
Tim@12563 2053 return NULL;
kaf24@11310 2054 }
kaf24@11310 2055 /* Now follow it down a level. Guaranteed to succeed. */
kaf24@11310 2056 return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
Tim@11867 2057 }
kaf24@11310 2058 #endif /* GUEST_PAGING_LEVELS >= 4 */
kaf24@11310 2059
kaf24@11310 2060
kaf24@11310 2061 static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v,
kaf24@11310 2062 walk_t *gw,
kaf24@11310 2063 mfn_t *sl2mfn,
kaf24@11310 2064 fetch_type_t ft)
kaf24@11310 2065 {
Tim@11867 2066 #if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
kaf24@11310 2067 mfn_t sl3mfn = _mfn(INVALID_MFN);
kaf24@11310 2068 shadow_l3e_t *sl3e;
Tim@12603 2069 if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */
kaf24@11310 2070 /* Get the l3e */
kaf24@11310 2071 sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft);
Tim@12563 2072 if ( sl3e == NULL ) return NULL;
kaf24@11310 2073 if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT )
kaf24@11310 2074 {
kaf24@11310 2075 *sl2mfn = shadow_l3e_get_mfn(*sl3e);
Tim@12603 2076 ASSERT(mfn_valid(*sl2mfn));
kaf24@11310 2077 }
kaf24@11310 2078 else
kaf24@11310 2079 {
kaf24@11310 2080 int r;
kaf24@11310 2081 shadow_l3e_t new_sl3e;
ack@14013 2082 unsigned int t = SH_type_l2_shadow;
ack@14013 2083
ack@14013 2084 /* Tag compat L2 containing hypervisor (m2p) mappings */
kfraser@14974 2085 if ( is_pv_32on64_domain(v->domain) &&
ack@14013 2086 guest_l4_table_offset(gw->va) == 0 &&
ack@14013 2087 guest_l3_table_offset(gw->va) == 3 )
ack@14013 2088 t = SH_type_l2h_shadow;
Tim@14962 2089
kaf24@11310 2090 /* No l2 shadow installed: find and install it. */
ack@14013 2091 *sl2mfn = get_shadow_status(v, gw->l2mfn, t);
Tim@12603 2092 if ( !mfn_valid(*sl2mfn) )
kaf24@11310 2093 {
kaf24@11310 2094 /* No l2 shadow of this page exists at all: make one. */
ack@14013 2095 *sl2mfn = sh_make_shadow(v, gw->l2mfn, t);
kaf24@11310 2096 }
kaf24@11310 2097 /* Install the new sl2 table in the sl3e */
Tim@16313 2098 l3e_propagate_from_guest(v, gw->l3e, *sl2mfn, &new_sl3e, ft);
kaf24@11310 2099 r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
kaf24@11310 2100 ASSERT((r & SHADOW_SET_FLUSH) == 0);
Tim@12563 2101 if ( r & SHADOW_SET_ERROR )
Tim@12563 2102 return NULL;
kaf24@11310 2103 }
kaf24@11310 2104 /* Now follow it down a level. Guaranteed to succeed. */
kaf24@11310 2105 return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
Tim@11867 2106 #elif GUEST_PAGING_LEVELS == 3 /* PAE... */
Tim@11867 2107 /* We never demand-shadow PAE l3es: they are only created in
Tim@11867 2108 * sh_update_cr3(). Check if the relevant sl3e is present. */
Tim@13909 2109 shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.paging.shadow.l3table)
Tim@11867 2110 + shadow_l3_linear_offset(gw->va);
Tim@11867 2111 if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
Tim@11867 2112 return NULL;
Tim@11867 2113 *sl2mfn = shadow_l3e_get_mfn(*sl3e);
Tim@12603 2114 ASSERT(mfn_valid(*sl2mfn));
Tim@11867 2115 return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
kaf24@11310 2116 #else /* 32bit... */
kaf24@11310 2117 /* There is always a shadow of the top level table. Get it. */
Tim@11867 2118 *sl2mfn = pagetable_get_mfn(v->arch.shadow_table[0]);
kaf24@11310 2119 /* This next line is important: the guest l2 has a 16k
kaf24@11310 2120 * shadow, we need to return the right mfn of the four. This
kaf24@11310 2121 * call will set it for us as a side-effect. */
Tim@16313 2122 (void) shadow_l2_index(sl2mfn, guest_l2_table_offset(gw->va));
kaf24@11310 2123 /* Reading the top level table is always valid. */
kaf24@11310 2124 return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
kaf24@11310 2125 #endif
kaf24@11310 2126 }
kaf24@11310 2127
kaf24@11310 2128
kaf24@11310 2129 static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v,
kaf24@11310 2130 walk_t *gw,
kaf24@11310 2131 mfn_t *sl1mfn,
kaf24@11310 2132 fetch_type_t ft)
kaf24@11310 2133 {
kaf24@11310 2134 mfn_t sl2mfn;
kaf24@11310 2135 shadow_l2e_t *sl2e;
kaf24@11310 2136
kaf24@11310 2137 /* Get the l2e */
kaf24@11310 2138 sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft);
kaf24@11310 2139 if ( sl2e == NULL ) return NULL;
tim@11688 2140 /* Install the sl1 in the l2e if it wasn't there or if we need to
tim@11688 2141 * re-do it to fix a PSE dirty bit. */
tim@11688 2142 if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT
tim@11688 2143 && likely(ft != ft_demand_write
Tim@16313 2144 || (shadow_l2e_get_flags(*sl2e) & _PAGE_RW)
Tim@16313 2145 || !(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
kaf24@11310 2146 {
kaf24@11310 2147 *sl1mfn = shadow_l2e_get_mfn(*sl2e);
Tim@12603 2148 ASSERT(mfn_valid(*sl1mfn));
kaf24@11310 2149 }
kaf24@11310 2150 else
kaf24@11310 2151 {
kaf24@11310 2152 shadow_l2e_t new_sl2e;
Tim@16313 2153 int r, flags = guest_l2e_get_flags(gw->l2e);
kaf24@11310 2154 /* No l1 shadow installed: find and install it. */
kaf24@11310 2155 if ( !(flags & _PAGE_PRESENT) )
kaf24@11310 2156 return NULL; /* No guest page. */
kaf24@11310 2157 if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) )
kaf24@11310 2158 {
kaf24@11310 2159 /* Splintering a superpage */
Tim@16313 2160 gfn_t l2gfn = guest_l2e_get_gfn(gw->l2e);
kaf24@11310 2161 *sl1mfn = get_fl1_shadow_status(v, l2gfn);
Tim@12603 2162 if ( !mfn_valid(*sl1mfn) )
kaf24@11310 2163 {
kaf24@11310 2164 /* No fl1 shadow of this superpage exists at all: make one. */
kaf24@11310 2165 *sl1mfn = make_fl1_shadow(v, l2gfn);
kaf24@11310 2166 }
kaf24@11310 2167 }
kaf24@11310 2168 else
kaf24@11310 2169 {
kaf24@11310 2170 /* Shadowing an actual guest l1 table */
keir@17510 2171 if ( !mfn_valid(gw->l1mfn) ) return NULL; /* No guest page. */
Tim@12561 2172 *sl1mfn = get_shadow_status(v, gw->l1mfn, SH_type_l1_shadow);
Tim@12603 2173 if ( !mfn_valid(*sl1mfn) )
kaf24@11310 2174 {
kaf24@11310 2175 /* No l1 shadow of this page exists at all: make one. */
Tim@12561 2176 *sl1mfn = sh_make_shadow(v, gw->l1mfn, SH_type_l1_shadow);
kaf24@11310 2177 }
kaf24@11310 2178 }
kaf24@11310 2179 /* Install the new sl1 table in the sl2e */
Tim@16313 2180 l2e_propagate_from_guest(v, gw->l2e, *sl1mfn, &new_sl2e, ft);
kaf24@11310 2181 r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
kaf24@11310 2182 ASSERT((r & SHADOW_SET_FLUSH) == 0);
Tim@12563 2183 if ( r & SHADOW_SET_ERROR )
Tim@12563 2184 return NULL;
kaf24@11310 2185 /* This next line is important: in 32-on-PAE and 32-on-64 modes,
kaf24@11310 2186 * the guest l1 table has an 8k shadow, and we need to return
kaf24@11310 2187 * the right mfn of the pair. This call will set it for us as a
kaf24@11310 2188 * side-effect. (In all other cases, it's a no-op and will be
kaf24@11310 2189 * compiled out.) */
kaf24@11310 2190 (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
kaf24@11310 2191 }
kaf24@11310 2192 /* Now follow it down a level. Guaranteed to succeed. */
kaf24@11310 2193 return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
kaf24@11310 2194 }
kaf24@11310 2195
kaf24@11310 2196
kaf24@11310 2197
kaf24@11310 2198 /**************************************************************************/
kaf24@11310 2199 /* Destructors for shadow tables:
kaf24@11310 2200 * Unregister the shadow, decrement refcounts of any entries present in it,
kaf24@11310 2201 * and release the memory.
kaf24@11310 2202 *
kaf24@11310 2203 * N.B. These destructors do not clear the contents of the shadows.
kaf24@11310 2204 * This allows us to delay TLB shootdowns until the page is being reused.
kaf24@11310 2205 * See shadow_alloc() and shadow_free() for how this is handled.
kaf24@11310 2206 */
kaf24@11310 2207
kaf24@11310 2208 #if GUEST_PAGING_LEVELS >= 4
kaf24@11310 2209 void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
kaf24@11310 2210 {
kaf24@11310 2211 shadow_l4e_t *sl4e;
Tim@12561 2212 u32 t = mfn_to_shadow_page(smfn)->type;
kaf24@11310 2213 mfn_t gmfn, sl4mfn;
kaf24@11310 2214
kaf24@11310 2215 SHADOW_DEBUG(DESTROY_SHADOW,
kaf24@11310 2216 "%s(%05lx)\n", __func__, mfn_x(smfn));
Tim@12561 2217 ASSERT(t == SH_type_l4_shadow);
kaf24@11310 2218
kaf24@11310 2219 /* Record that the guest page isn't shadowed any more (in this type) */
Tim@12561 2220 gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
kaf24@11310 2221 delete_shadow_status(v, gmfn, t, smfn);
kaf24@11310 2222 shadow_demote(v, gmfn, t);
kaf24@11310 2223 /* Decrement refcounts of all the old entries */
kaf24@11310 2224 sl4mfn = smfn;
ack@14013 2225 SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, v->domain, {
kaf24@11310 2226 if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT )
kaf24@11310 2227 {
kaf24@11310 2228 sh_put_ref(v, shadow_l4e_get_mfn(*sl4e),
Tim@12564 2229 (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT)
Tim@12564 2230 | ((unsigned long)sl4e & ~PAGE_MASK));
kaf24@11310 2231 }
kaf24@11310 2232 });
kaf24@11310 2233
kaf24@11310 2234 /* Put the memory back in the pool */
kaf24@11310 2235 shadow_free(v->domain, smfn);
kaf24@11310 2236 }
Tim@11867 2237
kaf24@11310 2238 void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
kaf24@11310 2239 {
kaf24@11310 2240 shadow_l3e_t *sl3e;
Tim@12561 2241 u32 t = mfn_to_shadow_page(smfn)->type;
kaf24@11310 2242 mfn_t gmfn, sl3mfn;
kaf24@11310 2243
kaf24@11310 2244 SHADOW_DEBUG(DESTROY_SHADOW,
kaf24@11310 2245 "%s(%05lx)\n", __func__, mfn_x(smfn));
Tim@12561 2246 ASSERT(t == SH_type_l3_shadow);
kaf24@11310 2247
kaf24@11310 2248 /* Record that the guest page isn't shadowed any more (in this type) */
Tim@12561 2249 gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
kaf24@11310 2250 delete_shadow_status(v, gmfn, t, smfn);
kaf24@11310 2251 shadow_demote(v, gmfn, t);
kaf24@11310 2252
kaf24@11310 2253 /* Decrement refcounts of all the old entries */
kaf24@11310 2254 sl3mfn = smfn;
kaf24@11310 2255 SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
kaf24@11310 2256 if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT )
kaf24@11310 2257 sh_put_ref(v, shadow_l3e_get_mfn(*sl3e),
kaf24@11310 2258 (((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT)
kaf24@11310 2259 | ((unsigned long)sl3e & ~PAGE_MASK));
kaf24@11310 2260 });
kaf24@11310 2261
kaf24@11310 2262 /* Put the memory back in the pool */
kaf24@11310 2263 shadow_free(v->domain, smfn);
kaf24@11310 2264 }
Tim@11867 2265 #endif /* GUEST_PAGING_LEVELS >= 4 */
Tim@11867 2266
kaf24@11310 2267
kaf24@11310 2268 void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
kaf24@11310 2269 {
kaf24@11310 2270 shadow_l2e_t *sl2e;
Tim@12561 2271 u32 t = mfn_to_shadow_page(smfn)->type;
kaf24@11310 2272 mfn_t gmfn, sl2mfn;
kaf24@11310 2273
kaf24@11310 2274 SHADOW_DEBUG(DESTROY_SHADOW,
kaf24@11310 2275 "%s(%05lx)\n", __func__, mfn_x(smfn));
ack@14013 2276
ack@14013 2277 #if GUEST_PAGING_LEVELS >= 3
ack@14013 2278 ASSERT(t == SH_type_l2_shadow || t == SH_type_l2h_shadow);
ack@14013 2279 #else
ack@14013 2280 ASSERT(t == SH_type_l2_shadow);
ack@14013 2281 #endif
kaf24@11310 2282
kaf24@11310 2283 /* Record that the guest page isn't shadowed any more (in this type) */
Tim@12561 2284 gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
kaf24@11310 2285 delete_shadow_status(v, gmfn, t, smfn);
kaf24@11310 2286 shadow_demote(v, gmfn, t);
kaf24@11310 2287
kaf24@11310 2288 /* Decrement refcounts of all the old entries */
kaf24@11310 2289 sl2mfn = smfn;
ack@14013 2290 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
kaf24@11310 2291 if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT )
kaf24@11310 2292 sh_put_ref(v, shadow_l2e_get_mfn(*sl2e),
kaf24@11310 2293 (((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
kaf24@11310 2294 | ((unsigned long)sl2e & ~PAGE_MASK));
kaf24@11310 2295 });
kaf24@11310 2296
kaf24@11310 2297 /* Put the memory back in the pool */
kaf24@11310 2298 shadow_free(v->domain, smfn);
kaf24@11310 2299 }
kaf24@11310 2300
kaf24@11310 2301 void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
kaf24@11310 2302 {
kaf24@11310 2303 struct domain *d = v->domain;
kaf24@11310 2304 shadow_l1e_t *sl1e;
Tim@12561 2305 u32 t = mfn_to_shadow_page(smfn)->type;
kaf24@11310 2306
kaf24@11310 2307 SHADOW_DEBUG(DESTROY_SHADOW,
kaf24@11310 2308 "%s(%05lx)\n", __func__, mfn_x(smfn));
Tim@12561 2309 ASSERT(t == SH_type_l1_shadow || t == SH_type_fl1_shadow);
kaf24@11310 2310
kaf24@11310 2311 /* Record that the guest page isn't shadowed any more (in this type) */
Tim@12561 2312 if ( t == SH_type_fl1_shadow )
kaf24@11310 2313 {
Tim@12561 2314 gfn_t gfn = _gfn(mfn_to_shadow_page(smfn)->backpointer);
kaf24@11310 2315 delete_fl1_shadow_status(v, gfn, smfn);
kaf24@11310 2316 }
kaf24@11310 2317 else
kaf24@11310 2318 {
Tim@12561 2319 mfn_t gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
kaf24@11310 2320 delete_shadow_status(v, gmfn, t, smfn);
kaf24@11310 2321 shadow_demote(v, gmfn, t);
kaf24@11310 2322 }
kaf24@11310 2323
kaf24@11310 2324 if ( shadow_mode_refcounts(d) )
kaf24@11310 2325 {
kaf24@11310 2326 /* Decrement refcounts of all the old entries */
kaf24@11310 2327 mfn_t sl1mfn = smfn;
kaf24@11310 2328 SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
Tim@12069 2329 if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
keir@17571 2330 && !sh_l1e_is_magic(*sl1e) ) {
keir@17571 2331 shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
kaf24@11310 2332 shadow_put_page_from_l1e(*sl1e, d);
keir@17571 2333 }
kaf24@11310 2334 });
kaf24@11310 2335 }
kaf24@11310 2336
kaf24@11310 2337 /* Put the memory back in the pool */
kaf24@11310 2338 shadow_free(v->domain, smfn);
kaf24@11310 2339 }
kaf24@11310 2340
kaf24@11310 2341 #if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
kaf24@11310 2342 void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
kaf24@11310 2343 {
kaf24@11310 2344 struct domain *d = v->domain;
Tim@12561 2345 ASSERT(mfn_to_shadow_page(mmfn)->type == SH_type_monitor_table);
kaf24@11310 2346
kaf24@11310 2347 #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
kaf24@11310 2348 {
Tim@13332 2349 mfn_t m3mfn;
kaf24@11310 2350 l4_pgentry_t *l4e = sh_map_domain_page(mmfn);
keir@17025 2351 l3_pgentry_t *l3e;
keir@17025 2352 int linear_slot = shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START);
keir@17025 2353
keir@17025 2354 /* Need to destroy the l3 and l2 monitor pages used
keir@17025 2355 * for the linear map */
keir@17025 2356 ASSERT(l4e_get_flags(l4e[linear_slot]) & _PAGE_PRESENT);
keir@17025 2357 m3mfn = _mfn(l4e_get_pfn(l4e[linear_slot]));
keir@17025 2358 l3e = sh_map_domain_page(m3mfn);
keir@17025 2359 ASSERT(l3e_get_flags(l3e[0]) & _PAGE_PRESENT);
keir@17025 2360 shadow_free(d, _mfn(l3e_get_pfn(l3e[0])));
keir@17025 2361 sh_unmap_domain_page(l3e);
keir@17025 2362 shadow_free(d, m3mfn);
keir@17025 2363
kfraser@14974 2364 if ( is_pv_32on64_vcpu(v) )
ack@13311 2365 {
keir@17025 2366 /* Need to destroy the l3 and l2 monitor pages that map the
keir@17025 2367 * Xen VAs at 3GB-4GB */
keir@17025 2368 ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
keir@17025 2369 m3mfn = _mfn(l4e_get_pfn(l4e[0]));
keir@17025 2370 l3e = sh_map_domain_page(m3mfn);
ack@13311 2371 ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
ack@13311 2372 shadow_free(d, _mfn(l3e_get_pfn(l3e[3])));
ack@13311 2373 sh_unmap_domain_page(l3e);
keir@17025 2374 shadow_free(d, m3mfn);
ack@13311 2375 }
kaf24@11310 2376 sh_unmap_domain_page(l4e);
kaf24@11310 2377 }
kaf24@11310 2378 #elif CONFIG_PAGING_LEVELS == 3
kaf24@11310 2379 /* Need to destroy the l2 monitor page in slot 4 too */
kaf24@11310 2380 {
kaf24@11310 2381 l3_pgentry_t *l3e = sh_map_domain_page(mmfn);
kaf24@11310 2382 ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
kaf24@11310 2383 shadow_free(d, _mfn(l3e_get_pfn(l3e[3])));
kaf24@11310 2384 sh_unmap_domain_page(l3e);
kaf24@11310 2385 }
kaf24@11310 2386 #endif
kaf24@11310 2387
kaf24@11310 2388 /* Put the memory back in the pool */
kaf24@11310 2389 shadow_free(d, mmfn);
kaf24@11310 2390 }
kaf24@11310 2391 #endif
kaf24@11310 2392
kaf24@11310 2393 /**************************************************************************/
kaf24@11310 2394 /* Functions to destroy non-Xen mappings in a pagetable hierarchy.
kaf24@11310 2395 * These are called from common code when we are running out of shadow
kaf24@11310 2396 * memory, and unpinning all the top-level shadows hasn't worked.
kaf24@11310 2397 *
kaf24@11310 2398 * This implementation is pretty crude and slow, but we hope that it won't
kaf24@11310 2399 * be called very often. */
kaf24@11310 2400
kaf24@11310 2401 #if GUEST_PAGING_LEVELS == 2
kaf24@11310 2402
kaf24@11310 2403 void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
kaf24@11310 2404 {
kaf24@11310 2405 shadow_l2e_t *sl2e;
ack@14013 2406 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
kaf24@11310 2407 (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
kaf24@11310 2408 });
kaf24@11310 2409 }
kaf24@11310 2410
kaf24@11310 2411 #elif GUEST_PAGING_LEVELS == 3
kaf24@11310 2412
Tim@11867 2413 void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn)
Tim@11867 2414 /* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
kaf24@11310 2415 {
Tim@11867 2416 shadow_l2e_t *sl2e;
ack@14013 2417 SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
Tim@11867 2418 (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
kaf24@11310 2419 });
kaf24@11310 2420 }
kaf24@11310 2421
kaf24@11310 2422 #elif GUEST_PAGING_LEVELS == 4
kaf24@11310 2423
kaf24@11310 2424 void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn)
kaf24@11310 2425 {
kaf24@11310 2426 shadow_l4e_t *sl4e;
ack@14013 2427 SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, v->domain, {
kaf24@11310 2428 (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
kaf24@11310 2429 });
kaf24@11310 2430 }
kaf24@11310 2431
kaf24@11310 2432 #endif
kaf24@11310 2433
kaf24@11310 2434 /**************************************************************************/
kaf24@11310 2435 /* Internal translation functions.
kaf24@11310 2436 * These functions require a pointer to the shadow entry that will be updated.
kaf24@11310 2437 */
kaf24@11310 2438
kaf24@11310 2439 /* These functions take a new guest entry, translate it to shadow and write
kaf24@11310 2440 * the shadow entry.
kaf24@11310 2441 *
kaf24@11310 2442 * They return the same bitmaps as the shadow_set_lXe() functions.
kaf24@11310 2443 */
kaf24@11310 2444
kaf24@11310 2445 #if GUEST_PAGING_LEVELS >= 4
kaf24@11310 2446 static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
kaf24@11310 2447 {
kaf24@11310 2448 shadow_l4e_t new_sl4e;
Tim@16313 2449 guest_l4e_t new_gl4e = *(guest_l4e_t *)new_ge;
kaf24@11310 2450 shadow_l4e_t *sl4p = se;
kaf24@11310 2451 mfn_t sl3mfn = _mfn(INVALID_MFN);
Tim@15812 2452 struct domain *d = v->domain;
Tim@15863 2453 p2m_type_t p2mt;
kaf24@11310 2454 int result = 0;
kaf24@11310 2455
kfraser@14595 2456 perfc_incr(shadow_validate_gl4e_calls);
kaf24@11310 2457
Tim@16313 2458 if ( guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT )
kaf24@11310 2459 {
Tim@16313 2460 gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e);
Tim@15863 2461 mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
Tim@15863 2462 if ( p2m_is_ram(p2mt) )
Tim@12561 2463 sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
kaf24@11310 2464 else
kaf24@11310 2465 result |= SHADOW_SET_ERROR;
kaf24@11310 2466 }
Tim@16313 2467 l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch);
tim@11666 2468
tim@11666 2469 // check for updates to xen reserved slots
Tim@15812 2470 if ( !shadow_mode_external(d) )
tim@11666 2471 {
tim@11666 2472 int shadow_index = (((unsigned long)sl4p & ~PAGE_MASK) /
tim@11666 2473 sizeof(shadow_l4e_t));
Tim@15812 2474 int reserved_xen_slot = !is_guest_l4_slot(d, shadow_index);
tim@11666 2475
tim@11666 2476 if ( unlikely(reserved_xen_slot) )
tim@11666 2477 {
tim@11666 2478 // attempt by the guest to write to a xen reserved slot
tim@11666 2479 //
tim@11666 2480 SHADOW_PRINTK("%s out-of-range update "
tim@11666 2481 "sl4mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
tim@11666 2482 __func__, mfn_x(sl4mfn), shadow_index, new_sl4e.l4);
tim@11666 2483 if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
tim@11666 2484 {
tim@11666 2485 SHADOW_ERROR("out-of-range l4e update\n");
tim@11666 2486 result |= SHADOW_SET_ERROR;
tim@11666 2487 }
tim@11666 2488
tim@11666 2489 // do not call shadow_set_l4e...
tim@11666 2490 return result;
tim@11666 2491 }
tim@11666 2492 }
tim@11666 2493
kaf24@11310 2494 result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
kaf24@11310 2495 return result;
kaf24@11310 2496 }
Tim@11867 2497
Tim@11867 2498
kaf24@11310 2499 static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
kaf24@11310 2500 {
kaf24@11310 2501 shadow_l3e_t new_sl3e;
Tim@16313 2502 guest_l3e_t new_gl3e = *(guest_l3e_t *)new_ge;
kaf24@11310 2503 shadow_l3e_t *sl3p = se;
kaf24@11310 2504 mfn_t sl2mfn = _mfn(INVALID_MFN);
Tim@15863 2505 p2m_type_t p2mt;
kaf24@11310 2506 int result = 0;
kaf24@11310 2507
kfraser@14595 2508 perfc_incr(shadow_validate_gl3e_calls);
kaf24@11310 2509
Tim@16313 2510 if ( guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT )
kaf24@11310 2511 {
Tim@16313 2512 gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e);
Tim@15863 2513 mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
Tim@15863 2514 if ( p2m_is_ram(p2mt) )
Tim@12561 2515 sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
kaf24@11310 2516 else
kaf24@11310 2517 result |= SHADOW_SET_ERROR;
kaf24@11310 2518 }
Tim@16313 2519 l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch);
kaf24@11310 2520 result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
kaf24@11310 2521
kaf24@11310 2522 return result;
kaf24@11310 2523 }
Tim@11867 2524 #endif // GUEST_PAGING_LEVELS >= 4
kaf24@11310 2525
kaf24@11310 2526 static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
kaf24@11310 2527 {
kaf24@11310 2528 shadow_l2e_t new_sl2e;
Tim@16313 2529 guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge;
kaf24@11310 2530 shadow_l2e_t *sl2p = se;
kaf24@11310 2531 mfn_t sl1mfn = _mfn(INVALID_MFN);
Tim@15863 2532 p2m_type_t p2mt;
kaf24@11310 2533 int result = 0;
kaf24@11310 2534
kfraser@14595 2535 perfc_incr(shadow_validate_gl2e_calls);
kaf24@11310 2536
Tim@16313 2537 if ( guest_l2e_get_flags(new_gl2e) & _PAGE_PRESENT )
kaf24@11310 2538 {
Tim@16313 2539 gfn_t gl1gfn = guest_l2e_get_gfn(new_gl2e);
kaf24@11310 2540 if ( guest_supports_superpages(v) &&
Tim@16313 2541 (guest_l2e_get_flags(new_gl2e) & _PAGE_PSE) )
kaf24@11310 2542 {
kaf24@11310 2543 // superpage -- need to look up the shadow L1 which holds the
kaf24@11310 2544 // splitters...
kaf24@11310 2545 sl1mfn = get_fl1_shadow_status(v, gl1gfn);
kaf24@11310 2546 #if 0
kaf24@11310 2547 // XXX - it's possible that we want to do some kind of prefetch
kaf24@11310 2548 // for superpage fl1's here, but this is *not* on the demand path,
kaf24@11310 2549 // so we'll hold off trying that for now...
kaf24@11310 2550 //
Tim@12603 2551 if ( !mfn_valid(sl1mfn) )
kaf24@11310 2552 sl1mfn = make_fl1_shadow(v, gl1gfn);
kaf24@11310 2553 #endif
kaf24@11310 2554 }
kaf24@11310 2555 else
kaf24@11310 2556 {
Tim@15863 2557 mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
Tim@15863 2558 if ( p2m_is_ram(p2mt) )
Tim@12561 2559 sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
kaf24@11310 2560 else
kaf24@11310 2561 result |= SHADOW_SET_ERROR;
kaf24@11310 2562 }
kaf24@11310 2563 }
Tim@16313 2564 l2e_propagate_from_guest(v, new_gl2e, sl1mfn, &new_sl2e, ft_prefetch);
tim@11666 2565
tim@11666 2566 // check for updates to xen reserved slots in PV guests...
tim@11666 2567 // XXX -- need to revisit this for PV 3-on-4 guests.
tim@11666 2568 //
tim@11666 2569 #if SHADOW_PAGING_LEVELS < 4
tim@11666 2570 #if CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS
tim@11666 2571 if ( !shadow_mode_external(v->domain) )
tim@11666 2572 {
tim@11666 2573 int shadow_index = (((unsigned long)sl2p & ~PAGE_MASK) /
tim@11666 2574 sizeof(shadow_l2e_t));
tim@11666 2575 int reserved_xen_slot;
tim@11666 2576
tim@11666 2577 #if SHADOW_PAGING_LEVELS == 3
tim@11666 2578 reserved_xen_slot =
Tim@12561 2579 ((mfn_to_shadow_page(sl2mfn)->type == SH_type_l2h_pae_shadow) &&
tim@11666 2580 (shadow_index
tim@11666 2581 >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
tim@11666 2582 #else /* SHADOW_PAGING_LEVELS == 2 */
tim@11666 2583 reserved_xen_slot = (shadow_index >= L2_PAGETABLE_FIRST_XEN_SLOT);
tim@11666 2584 #endif
tim@11666 2585
tim@11666 2586 if ( unlikely(reserved_xen_slot) )
tim@11666 2587 {
tim@11666 2588 // attempt by the guest to write to a xen reserved slot
tim@11666 2589 //
tim@11666 2590 SHADOW_PRINTK("%s out-of-range update "
tim@11666 2591 "sl2mfn=%05lx index=0x%x val=%" SH_PRI_pte "\n",
tim@11666 2592 __func__, mfn_x(sl2mfn), shadow_index, new_sl2e.l2);
tim@11666 2593 if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
tim@11666 2594 {
tim@11666 2595 SHADOW_ERROR("out-of-range l2e update\n");
tim@11666 2596 result |= SHADOW_SET_ERROR;
tim@11666 2597 }
tim@11666 2598
tim@11666 2599 // do not call shadow_set_l2e...
tim@11666 2600 return result;
tim@11666 2601 }
tim@11666 2602 }
tim@11666 2603 #endif /* CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS */
tim@11666 2604 #endif /* SHADOW_PAGING_LEVELS < 4 */
tim@11666 2605
kaf24@11310 2606 result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn);
kaf24@11310 2607
kaf24@11310 2608 return result;
kaf24@11310 2609 }
kaf24@11310 2610
kaf24@11310 2611 static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
kaf24@11310 2612 {
kaf24@11310 2613 shadow_l1e_t new_sl1e;
Tim@16313 2614 guest_l1e_t new_gl1e = *(guest_l1e_t *)new_ge;
kaf24@11310 2615 shadow_l1e_t *sl1p = se;
kaf24@11310 2616 gfn_t gfn;
Tim@12069 2617 mfn_t gmfn;
Tim@15863 2618 p2m_type_t p2mt;
Tim@15863 2619 int result = 0;
keir@17905 2620 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17905 2621 mfn_t gl1mfn;
keir@17905 2622 #endif /* OOS */
kaf24@11310 2623
kfraser@14595 2624 perfc_incr(shadow_validate_gl1e_calls);
kaf24@11310 2625
Tim@16313 2626 gfn = guest_l1e_get_gfn(new_gl1e);
Tim@15863 2627 gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
Tim@15863 2628
Tim@16313 2629 l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt);
kaf24@11310 2630 result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
keir@17905 2631
keir@17905 2632 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17905 2633 gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
keir@17905 2634 if ( mfn_valid(gl1mfn)
keir@17905 2635 && mfn_is_out_of_sync(gl1mfn) )
keir@17905 2636 {
keir@17905 2637 /* Update the OOS snapshot. */
keir@17905 2638 mfn_t snpmfn = oos_snapshot_lookup(v, gl1mfn);
keir@17905 2639 guest_l1e_t *snp;
keir@17905 2640
keir@17905 2641 ASSERT(mfn_valid(snpmfn));
keir@17905 2642
keir@17905 2643 snp = sh_map_domain_page(snpmfn);
keir@17905 2644 snp[guest_index(new_ge)] = new_gl1e;
keir@17905 2645 sh_unmap_domain_page(snp);
keir@17905 2646 }
keir@17905 2647 #endif /* OOS */
keir@17905 2648
kaf24@11310 2649 return result;
kaf24@11310 2650 }
kaf24@11310 2651
keir@17903 2652 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17903 2653 /**************************************************************************/
keir@17903 2654 /* Special validation function for re-syncing out-of-sync shadows.
keir@17903 2655 * Walks the *shadow* page, and for every entry that it finds,
keir@17903 2656 * revalidates the guest entry that corresponds to it.
keir@17903 2657 * N.B. This function is called with the vcpu that unsynced the page,
keir@17903 2658 * *not* the one that is causing it to be resynced. */
keir@17905 2659 void sh_resync_l1(struct vcpu *v, mfn_t gl1mfn, mfn_t snpmfn)
keir@17903 2660 {
keir@17903 2661 mfn_t sl1mfn;
keir@17903 2662 shadow_l1e_t *sl1p;
keir@17905 2663 guest_l1e_t *gl1p, *gp, *snp;
keir@17903 2664 int rc = 0;
keir@17903 2665
keir@17905 2666 ASSERT(mfn_valid(snpmfn));
keir@17905 2667
keir@17905 2668 sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
keir@17903 2669 ASSERT(mfn_valid(sl1mfn)); /* Otherwise we would not have been called */
keir@17903 2670
keir@17905 2671 snp = sh_map_domain_page(snpmfn);
keir@17905 2672 gp = sh_map_domain_page(gl1mfn);
keir@17903 2673 gl1p = gp;
keir@17903 2674
keir@17905 2675 SHADOW_FOREACH_L1E(sl1mfn, sl1p, &gl1p, 0, {
keir@17905 2676 guest_l1e_t gl1e = *gl1p;
keir@17905 2677 guest_l1e_t *snpl1p = (guest_l1e_t *)snp + guest_index(gl1p);
keir@17905 2678
keir@17905 2679 if ( memcmp(snpl1p, &gl1e, sizeof(gl1e)) )
keir@17905 2680 {
keir@17905 2681 gfn_t gfn;
keir@17905 2682 mfn_t gmfn;
keir@17905 2683 p2m_type_t p2mt;
keir@17905 2684 shadow_l1e_t nsl1e;
keir@17905 2685
keir@17905 2686 gfn = guest_l1e_get_gfn(gl1e);
keir@17905 2687 gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
keir@17905 2688 l1e_propagate_from_guest(v, gl1e, gmfn, &nsl1e, ft_prefetch, p2mt);
keir@17905 2689 rc |= shadow_set_l1e(v, sl1p, nsl1e, sl1mfn);
keir@17905 2690
keir@17905 2691 *snpl1p = gl1e;
keir@17905 2692 }
keir@17903 2693 });
keir@17903 2694
keir@17903 2695 sh_unmap_domain_page(gp);
keir@17905 2696 sh_unmap_domain_page(snp);
keir@17903 2697
keir@17903 2698 /* Setting shadow L1 entries should never need us to flush the TLB */
keir@17903 2699 ASSERT(!(rc & SHADOW_SET_FLUSH));
keir@17903 2700 }
keir@17903 2701
keir@17903 2702 /* Figure out whether it's definitely safe not to sync this l1 table.
keir@17903 2703 * That is: if we can tell that it's only used once, and that the
keir@17903 2704 * toplevel shadow responsible is not one of ours.
keir@17903 2705 * N.B. This function is called with the vcpu that required the resync,
keir@17903 2706 * *not* the one that originally unsynced the page, but it is
keir@17903 2707 * called in the *mode* of the vcpu that unsynced it. Clear? Good. */
keir@17903 2708 int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn)
keir@17903 2709 {
keir@17903 2710 struct shadow_page_info *sp;
keir@17903 2711 mfn_t smfn;
keir@17903 2712
keir@17903 2713 smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
keir@17903 2714 ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
keir@17903 2715
keir@17903 2716 /* Up to l2 */
keir@17903 2717 sp = mfn_to_shadow_page(smfn);
keir@17903 2718 if ( sp->count != 1 || !sp->up )
keir@17903 2719 return 0;
keir@17903 2720 smfn = _mfn(sp->up >> PAGE_SHIFT);
keir@17903 2721 ASSERT(mfn_valid(smfn));
keir@17903 2722
keir@17903 2723 #if (SHADOW_PAGING_LEVELS == 4)
keir@17903 2724 /* up to l3 */
keir@17903 2725 sp = mfn_to_shadow_page(smfn);
keir@17903 2726 if ( sp->count != 1 || !sp->up )
keir@17903 2727 return 0;
keir@17903 2728 smfn = _mfn(sp->up >> PAGE_SHIFT);
keir@17903 2729 ASSERT(mfn_valid(smfn));
keir@17903 2730
keir@17903 2731 /* up to l4 */
keir@17903 2732 sp = mfn_to_shadow_page(smfn);
keir@17903 2733 if ( sp->count != 1
keir@17903 2734 || sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up )
keir@17903 2735 return 0;
keir@17903 2736 smfn = _mfn(sp->up >> PAGE_SHIFT);
keir@17903 2737 ASSERT(mfn_valid(smfn));
keir@17903 2738
keir@17903 2739 #if (GUEST_PAGING_LEVELS == 2)
keir@17903 2740 /* In 2-on-3 shadow mode the up pointer contains the link to the
keir@17903 2741 * shadow page, but the shadow_table contains only the first of the
keir@17903 2742 * four pages that makes the PAE top shadow tables. */
keir@17903 2743 smfn = _mfn(mfn_x(smfn) & ~0x3UL);
keir@17903 2744 #endif
keir@17903 2745
keir@17903 2746 #endif
keir@17903 2747
keir@17903 2748 if ( pagetable_get_pfn(v->arch.shadow_table[0]) == mfn_x(smfn)
keir@17903 2749 #if (SHADOW_PAGING_LEVELS == 3)
keir@17903 2750 || pagetable_get_pfn(v->arch.shadow_table[1]) == mfn_x(smfn)
keir@17903 2751 || pagetable_get_pfn(v->arch.shadow_table[2]) == mfn_x(smfn)
keir@17903 2752 || pagetable_get_pfn(v->arch.shadow_table[3]) == mfn_x(smfn)
keir@17903 2753 #endif
keir@17903 2754 )
keir@17903 2755 return 0;
keir@17903 2756
keir@17903 2757 /* Only in use in one toplevel shadow, and it's not the one we're
keir@17903 2758 * running on */
keir@17903 2759 return 1;
keir@17903 2760 }
keir@17903 2761 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
keir@17903 2762
kaf24@11310 2763
kaf24@11310 2764 /**************************************************************************/
steven@11322 2765 /* Functions which translate and install the shadows of arbitrary guest
kaf24@11310 2766 * entries that we have just seen the guest write. */
kaf24@11310 2767
kaf24@11310 2768
kaf24@11310 2769 static inline int
kaf24@11310 2770 sh_map_and_validate(struct vcpu *v, mfn_t gmfn,
kaf24@11310 2771 void *new_gp, u32 size, u32 sh_type,
kaf24@11310 2772 u32 (*shadow_index)(mfn_t *smfn, u32 idx),
kaf24@11310 2773 int (*validate_ge)(struct vcpu *v, void *ge,
kaf24@11310 2774 mfn_t smfn, void *se))
kaf24@11310 2775 /* Generic function for mapping and validating. */
kaf24@11310 2776 {
kaf24@11310 2777 mfn_t smfn, smfn2, map_mfn;
kaf24@11310 2778 shadow_l1e_t *sl1p;
kaf24@11310 2779 u32 shadow_idx, guest_idx;
kaf24@11310 2780 int result = 0;
kaf24@11310 2781
kaf24@11310 2782 /* Align address and size to guest entry boundaries */
kaf24@11310 2783 size += (unsigned long)new_gp & (sizeof (guest_l1e_t) - 1);
kaf24@11310 2784 new_gp = (void *)((unsigned long)new_gp & ~(sizeof (guest_l1e_t) - 1));
kaf24@11310 2785 size = (size + sizeof (guest_l1e_t) - 1) & ~(sizeof (guest_l1e_t) - 1);
kaf24@11310 2786 ASSERT(size + (((unsigned long)new_gp) & ~PAGE_MASK) <= PAGE_SIZE);
kaf24@11310 2787
kaf24@11310 2788 /* Map the shadow page */
kaf24@11310 2789 smfn = get_shadow_status(v, gmfn, sh_type);
Tim@12603 2790 ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
kaf24@11310 2791 guest_idx = guest_index(new_gp);
kaf24@11310 2792 map_mfn = smfn;
kaf24@11310 2793 shadow_idx = shadow_index(&map_mfn, guest_idx);
keir@17620 2794 sl1p = sh_map_domain_page(map_mfn);
kaf24@11310 2795
kaf24@11310 2796 /* Validate one entry at a time */
kaf24@11310 2797 while ( size )
kaf24@11310 2798 {
kaf24@11310 2799 smfn2 = smfn;
kaf24@11310 2800 guest_idx = guest_index(new_gp);
kaf24@11310 2801 shadow_idx = shadow_index(&smfn2, guest_idx);
kaf24@11310 2802 if ( mfn_x(smfn2) != mfn_x(map_mfn) )
kaf24@11310 2803 {
kaf24@11310 2804 /* We have moved to another page of the shadow */
kaf24@11310 2805 map_mfn = smfn2;
keir@17620 2806 sh_unmap_domain_page(sl1p);
keir@17620 2807 sl1p = sh_map_domain_page(map_mfn);
kaf24@11310 2808 }
kaf24@11310 2809 result |= validate_ge(v,
kaf24@11310 2810 new_gp,
kaf24@11310 2811 map_mfn,
kaf24@11310 2812 &sl1p[shadow_idx]);
kaf24@11310 2813 size -= sizeof(guest_l1e_t);
kaf24@11310 2814 new_gp += sizeof(guest_l1e_t);
kaf24@11310 2815 }
keir@17620 2816 sh_unmap_domain_page(sl1p);
kaf24@11310 2817 return result;
kaf24@11310 2818 }
kaf24@11310 2819
kaf24@11310 2820
kaf24@11310 2821 int
kaf24@11310 2822 sh_map_and_validate_gl4e(struct vcpu *v, mfn_t gl4mfn,
kaf24@11310 2823 void *new_gl4p, u32 size)
kaf24@11310 2824 {
kaf24@11310 2825 #if GUEST_PAGING_LEVELS >= 4
kaf24@11310 2826 return sh_map_and_validate(v, gl4mfn, new_gl4p, size,
Tim@12561 2827 SH_type_l4_shadow,
kaf24@11310 2828 shadow_l4_index,
kaf24@11310 2829 validate_gl4e);
kaf24@11310 2830 #else // ! GUEST_PAGING_LEVELS >= 4
keir@16090 2831 SHADOW_ERROR("called in wrong paging mode!\n");
kaf24@11310 2832 BUG();
kaf24@11310 2833 return 0;
kaf24@11310 2834 #endif
kaf24@11310 2835 }
kaf24@11310 2836
kaf24@11310 2837 int
kaf24@11310 2838 sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
kaf24@11310 2839 void *new_gl3p, u32 size)
kaf24@11310 2840 {
Tim@11867 2841 #if GUEST_PAGING_LEVELS >= 4
kaf24@11310 2842 return sh_map_and_validate(v, gl3mfn, new_gl3p, size,
Tim@12561 2843 SH_type_l3_shadow,
kaf24@11310 2844 shadow_l3_index,
kaf24@11310 2845 validate_gl3e);
Tim@11867 2846 #else // ! GUEST_PAGING_LEVELS >= 4
keir@16090 2847 SHADOW_ERROR("called in wrong paging mode!\n");
kaf24@11310 2848 BUG();
kaf24@11310 2849 return 0;
kaf24@11310 2850 #endif
kaf24@11310 2851 }
kaf24@11310 2852
kaf24@11310 2853 int
kaf24@11310 2854 sh_map_and_validate_gl2e(struct vcpu *v, mfn_t gl2mfn,
kaf24@11310 2855 void *new_gl2p, u32 size)
kaf24@11310 2856 {
kaf24@11310 2857 return sh_map_and_validate(v, gl2mfn, new_gl2p, size,
Tim@12561 2858 SH_type_l2_shadow,
kaf24@11310 2859 shadow_l2_index,
kaf24@11310 2860 validate_gl2e);
kaf24@11310 2861 }
kaf24@11310 2862
kaf24@11310 2863 int
kaf24@11310 2864 sh_map_and_validate_gl2he(struct vcpu *v, mfn_t gl2mfn,
kaf24@11310 2865 void *new_gl2p, u32 size)
kaf24@11310 2866 {
ack@14013 2867 #if GUEST_PAGING_LEVELS >= 3
kaf24@11310 2868 return sh_map_and_validate(v, gl2mfn, new_gl2p, size,
Tim@12561 2869 SH_type_l2h_shadow,
kaf24@11310 2870 shadow_l2_index,
kaf24@11310 2871 validate_gl2e);
kaf24@11310 2872 #else /* Non-PAE guests don't have different kinds of l2 table */
keir@16090 2873 SHADOW_ERROR("called in wrong paging mode!\n");
kaf24@11310 2874 BUG();
kaf24@11310 2875 return 0;
kaf24@11310 2876 #endif
kaf24@11310 2877 }
kaf24@11310 2878
kaf24@11310 2879 int
kaf24@11310 2880 sh_map_and_validate_gl1e(struct vcpu *v, mfn_t gl1mfn,
kaf24@11310 2881 void *new_gl1p, u32 size)
kaf24@11310 2882 {
kaf24@11310 2883 return sh_map_and_validate(v, gl1mfn, new_gl1p, size,
Tim@12561 2884 SH_type_l1_shadow,
kaf24@11310 2885 shadow_l1_index,
kaf24@11310 2886 validate_gl1e);
kaf24@11310 2887 }
kaf24@11310 2888
kaf24@11310 2889
kaf24@11310 2890 /**************************************************************************/
kaf24@11310 2891 /* Optimization: If we see two emulated writes of zeros to the same
kaf24@11310 2892 * page-table without another kind of page fault in between, we guess
kaf24@11310 2893 * that this is a batch of changes (for process destruction) and
kaf24@11310 2894 * unshadow the page so we don't take a pagefault on every entry. This
kaf24@11310 2895 * should also make finding writeable mappings of pagetables much
kaf24@11310 2896 * easier. */
kaf24@11310 2897
kaf24@11310 2898 /* Look to see if this is the second emulated write in a row to this
keir@16976 2899 * page, and unshadow if it is */
kaf24@11310 2900 static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
kaf24@11310 2901 {
kaf24@11310 2902 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
keir@17080 2903 if ( v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn)
keir@17080 2904 && sh_mfn_is_a_page_table(gmfn) )
kaf24@11310 2905 {
keir@16976 2906 perfc_incr(shadow_early_unshadow);
keir@16976 2907 sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
keir@18454 2908 TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EARLY_UNSHADOW);
kaf24@11310 2909 }
keir@17080 2910 v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
kaf24@11310 2911 #endif
kaf24@11310 2912 }
kaf24@11310 2913
kaf24@11310 2914 /* Stop counting towards early unshadows, as we've seen a real page fault */
kaf24@11310 2915 static inline void reset_early_unshadow(struct vcpu *v)
kaf24@11310 2916 {
kaf24@11310 2917 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
keir@17080 2918 v->arch.paging.shadow.last_emulated_mfn_for_unshadow = INVALID_MFN;
kaf24@11310 2919 #endif
kaf24@11310 2920 }
kaf24@11310 2921
kaf24@11310 2922
kaf24@11310 2923
kaf24@11310 2924 /**************************************************************************/
Tim@12069 2925 /* Optimization: Prefetch multiple L1 entries. This is called after we have
Tim@12069 2926 * demand-faulted a shadow l1e in the fault handler, to see if it's
Tim@12069 2927 * worth fetching some more.
Tim@12069 2928 */
Tim@12069 2929
Tim@12069 2930 #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
Tim@12069 2931
Tim@12069 2932 /* XXX magic number */
Tim@12069 2933 #define PREFETCH_DISTANCE 32
Tim@12069 2934
Tim@12069 2935 static void sh_prefetch(struct vcpu *v, walk_t *gw,
Tim@12069 2936 shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
Tim@12069 2937 {
Tim@15863 2938 int i, dist;
Tim@12069 2939 gfn_t gfn;
Tim@12069 2940 mfn_t gmfn;
Tim@16313 2941 guest_l1e_t *gl1p = NULL, gl1e;
Tim@12069 2942 shadow_l1e_t sl1e;
Tim@12069 2943 u32 gflags;
Tim@15863 2944 p2m_type_t p2mt;
keir@17905 2945 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17905 2946 guest_l1e_t *snpl1p = NULL;
keir@17905 2947 #endif /* OOS */
keir@17905 2948
Tim@12069 2949
Tim@12069 2950 /* Prefetch no further than the end of the _shadow_ l1 MFN */
Tim@12069 2951 dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
Tim@12069 2952 /* And no more than a maximum fetches-per-fault */
Tim@12069 2953 if ( dist > PREFETCH_DISTANCE )
Tim@12069 2954 dist = PREFETCH_DISTANCE;
Tim@12069 2955
Tim@16313 2956 if ( mfn_valid(gw->l1mfn) )
Tim@16313 2957 {
Tim@16313 2958 /* Normal guest page; grab the next guest entry */
Tim@16313 2959 gl1p = sh_map_domain_page(gw->l1mfn);
Tim@16313 2960 gl1p += guest_l1_table_offset(gw->va);
keir@17905 2961
keir@17905 2962 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17905 2963 if ( mfn_is_out_of_sync(gw->l1mfn) )
keir@17905 2964 {
keir@17905 2965 mfn_t snpmfn = oos_snapshot_lookup(v, gw->l1mfn);
keir@17905 2966
keir@17905 2967 ASSERT(mfn_valid(snpmfn));
keir@17905 2968 snpl1p = sh_map_domain_page(snpmfn);
keir@17905 2969 snpl1p += guest_l1_table_offset(gw->va);
keir@17905 2970 }
keir@17905 2971 #endif /* OOS */
Tim@16313 2972 }
Tim@16313 2973
Tim@12069 2974 for ( i = 1; i < dist ; i++ )
Tim@12069 2975 {
Tim@12069 2976 /* No point in prefetching if there's already a shadow */
Tim@12069 2977 if ( ptr_sl1e[i].l1 != 0 )
Tim@12069 2978 break;
Tim@12069 2979
Tim@16313 2980 if ( mfn_valid(gw->l1mfn) )
Tim@12069 2981 {
Tim@12069 2982 /* Normal guest page; grab the next guest entry */
Tim@16313 2983 gl1e = gl1p[i];
Tim@12069 2984 /* Not worth continuing if we hit an entry that will need another
Tim@12069 2985 * fault for A/D-bit propagation anyway */
Tim@12069 2986 gflags = guest_l1e_get_flags(gl1e);
Tim@12069 2987 if ( (gflags & _PAGE_PRESENT)
Tim@12069 2988 && (!(gflags & _PAGE_ACCESSED)
Tim@12069 2989 || ((gflags & _PAGE_RW) && !(gflags & _PAGE_DIRTY))) )
Tim@12069 2990 break;
Tim@12069 2991 }
Tim@12069 2992 else
Tim@12069 2993 {
Tim@12069 2994 /* Fragmented superpage, unless we've been called wrongly */
Tim@16313 2995 ASSERT(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE);
Tim@12069 2996 /* Increment the l1e's GFN by the right number of guest pages */
Tim@12069 2997 gl1e = guest_l1e_from_gfn(
Tim@16313 2998 _gfn(gfn_x(guest_l1e_get_gfn(gw->l1e)) + i),
Tim@16313 2999 guest_l1e_get_flags(gw->l1e));
Tim@12069 3000 }
Tim@12069 3001
Tim@12069 3002 /* Look at the gfn that the l1e is pointing at */
Tim@12069 3003 gfn = guest_l1e_get_gfn(gl1e);
Tim@15863 3004 gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
Tim@12069 3005
Tim@16313 3006 /* Propagate the entry. */
Tim@16313 3007 l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt);
Tim@12069 3008 (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
keir@17905 3009
keir@17905 3010 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17905 3011 if ( snpl1p != NULL )
keir@17905 3012 snpl1p[i] = gl1e;
keir@17905 3013 #endif /* OOS */
Tim@12069 3014 }
Tim@16313 3015 if ( gl1p != NULL )
Tim@16313 3016 sh_unmap_domain_page(gl1p);
keir@17905 3017 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
keir@17905 3018 if ( snpl1p != NULL )
keir@17905 3019 sh_unmap_domain_page(snpl1p);
keir@17905 3020 #endif /* OOS */
Tim@12069 3021 }
Tim@12069 3022
Tim@12069 3023 #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
Tim@12069 3024
keir@18454 3025 #if GUEST_PAGING_LEVELS == 4
keir@18454 3026 typedef u64 guest_va_t;
keir@18454 3027 typedef u64 guest_pa_t;
keir@18454 3028 #elif GUEST_PAGING_LEVELS == 3
keir@18454 3029 typedef u32 guest_va_t;
keir@18454 3030 typedef u64 guest_pa_t;
keir@18454 3031 #else
keir@18454 3032 typedef u32 guest_va_t;
keir@18454 3033 typedef u32 guest_pa_t;
keir@18454 3034 #endif
keir@18454 3035
keir@18454 3036 static inline void trace_shadow_gen(u32 event, guest_va_t va)
keir@18454 3037 {
keir@18454 3038 if ( tb_init_done )
keir@18454 3039 {
keir@18454 3040 event |= (GUEST_PAGING_LEVELS-2)<<8;
keir@18454 3041 __trace_var(event, 0/*!tsc*/, sizeof(va), (unsigned char*)&va);
keir@18454 3042 }
keir@18454 3043 }
keir@18454 3044
keir@18454 3045 static inline void trace_shadow_fixup(guest_l1e_t gl1e,
keir@18454 3046 guest_va_t va)
keir@18454 3047 {
keir@18454 3048 if ( tb_init_done )
keir@18454 3049 {
keir@18454 3050 struct {
keir@18454 3051 /* for PAE, guest_l1e may be 64 while guest_va may be 32;
keir@18454 3052 so put it first for alignment sake. */
keir@18454 3053 guest_l1e_t gl1e;
keir@18454 3054 guest_va_t va;
keir@18454 3055 u32 flags;
keir@18454 3056 } __attribute__((packed)) d;
keir@18454 3057 u32 event;
keir@18454 3058
keir@18454 3059 event = TRC_SHADOW_FIXUP | ((GUEST_PAGING_LEVELS-2)<<8);
keir@18454 3060
keir@18454 3061 d.gl1e = gl1e;
keir@18454 3062 d.va = va;
keir@18454 3063 d.flags = this_cpu(trace_shadow_path_flags);
keir@18454 3064
keir@18454 3065 __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
keir@18454 3066 }
keir@18454 3067 }
keir@18454 3068
keir@18454 3069 static inline void trace_not_shadow_fault(guest_l1e_t gl1e,
keir@18454 3070 guest_va_t va)
keir@18454 3071 {
keir@18454 3072 if ( tb_init_done )
keir@18454 3073 {
keir@18454 3074 struct {
keir@18454 3075 /* for PAE, guest_l1e may be 64 while guest_va may be 32;
keir@18454 3076 so put it first for alignment sake. */
keir@18454 3077 guest_l1e_t gl1e;
keir@18454 3078 guest_va_t va;
keir@18454 3079 u32 flags;
keir@18454 3080 } __attribute__((packed)) d;
keir@18454 3081 u32 event;
keir@18454 3082
keir@18454 3083 event = TRC_SHADOW_NOT_SHADOW | ((GUEST_PAGING_LEVELS-2)<<8);
keir@18454 3084
keir@18454 3085 d.gl1e = gl1e;
keir@18454 3086 d.va = va;
keir@18454 3087 d.flags = this_cpu(trace_shadow_path_flags);