ia64/xen-unstable

changeset 17903:597058a3b619

Out-of-sync L1 shadows: OOS base

This patch implements the basic mechanisms to get pagetables out of
sync and back in sync again.

Signed-off-by: Gianluca Guida <gianluca.guida@eu.citrix.com>
Signed-off-by: Tim Deegan <tim.deegan@eu.citrix.com>
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 20 18:39:45 2008 +0100 (2008-06-20)
parents 6ace85eb96c0
children f178082cce0a
files xen/arch/x86/mm.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/multi.h xen/arch/x86/mm/shadow/private.h xen/arch/x86/mm/shadow/types.h xen/include/asm-x86/domain.h xen/include/asm-x86/mm.h xen/include/asm-x86/perfc_defn.h
line diff
     1.1 --- a/xen/arch/x86/mm.c	Fri Jun 20 18:37:29 2008 +0100
     1.2 +++ b/xen/arch/x86/mm.c	Fri Jun 20 18:39:45 2008 +0100
     1.3 @@ -1933,9 +1933,15 @@ int get_page_type(struct page_info *page
     1.4          {
     1.5              struct domain *d = page_get_owner(page);
     1.6  
     1.7 -            /* Never allow a shadowed frame to go from type count 0 to 1 */
     1.8 -            if ( d && shadow_mode_enabled(d) )
     1.9 -                shadow_remove_all_shadows(d->vcpu[0], _mfn(page_to_mfn(page)));
    1.10 +            /* Normally we should never let a page go from type count 0
    1.11 +             * to type count 1 when it is shadowed. One exception:
    1.12 +             * out-of-sync shadowed pages are allowed to become
    1.13 +             * writeable. */
    1.14 +            if ( d && shadow_mode_enabled(d)
    1.15 +                 && (page->count_info & PGC_page_table)
    1.16 +                 && !((page->shadow_flags & (1u<<29))
    1.17 +                      && type == PGT_writable_page) )
    1.18 +               shadow_remove_all_shadows(d->vcpu[0], _mfn(page_to_mfn(page)));
    1.19  
    1.20              ASSERT(!(x & PGT_pae_xen_l2));
    1.21              if ( (x & PGT_type_mask) != type )
     2.1 --- a/xen/arch/x86/mm/shadow/common.c	Fri Jun 20 18:37:29 2008 +0100
     2.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri Jun 20 18:39:45 2008 +0100
     2.3 @@ -54,6 +54,10 @@ void shadow_domain_init(struct domain *d
     2.4      /* Use shadow pagetables for log-dirty support */
     2.5      paging_log_dirty_init(d, shadow_enable_log_dirty, 
     2.6                            shadow_disable_log_dirty, shadow_clean_dirty_bitmap);
     2.7 +
     2.8 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     2.9 +    d->arch.paging.shadow.oos_active = 0;
    2.10 +#endif
    2.11  }
    2.12  
    2.13  /* Setup the shadow-specfic parts of a vcpu struct. Note: The most important
    2.14 @@ -64,6 +68,13 @@ void shadow_domain_init(struct domain *d
    2.15   */
    2.16  void shadow_vcpu_init(struct vcpu *v)
    2.17  {
    2.18 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
    2.19 +    int i;
    2.20 +
    2.21 +    for ( i = 0; i < SHADOW_OOS_PAGES; i++ )
    2.22 +        v->arch.paging.shadow.oos[i] = _mfn(INVALID_MFN);
    2.23 +#endif
    2.24 +
    2.25      v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 3);
    2.26  }
    2.27  
    2.28 @@ -427,6 +438,404 @@ void shadow_continue_emulation(struct sh
    2.29          }
    2.30      }
    2.31  }
    2.32 + 
    2.33 +
    2.34 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
    2.35 +/**************************************************************************/
    2.36 +/* Out-of-sync shadows. */ 
    2.37 +
    2.38 +/* From time to time, we let a shadowed pagetable page go out of sync 
    2.39 + * with its shadow: the guest is allowed to write directly to the page, 
    2.40 + * and those writes are not synchronously reflected in the shadow.
    2.41 + * This lets us avoid many emulations if the guest is writing a lot to a 
    2.42 + * pagetable, but it relaxes a pretty important invariant in the shadow 
    2.43 + * pagetable design.  Therefore, some rules:
    2.44 + *
    2.45 + * 1. Only L1 pagetables may go out of sync: any page that is shadowed
    2.46 + *    at at higher level must be synchronously updated.  This makes
    2.47 + *    using linear shadow pagetables much less dangerous.
    2.48 + *    That means that: (a) unsyncing code needs to check for higher-level
    2.49 + *    shadows, and (b) promotion code needs to resync.
    2.50 + * 
    2.51 + * 2. All shadow operations on a guest page require the page to be brought
    2.52 + *    back into sync before proceeding.  This must be done under the
    2.53 + *    shadow lock so that the page is guaranteed to remain synced until
    2.54 + *    the operation completes.
    2.55 + *
    2.56 + *    Exceptions to this rule: the pagefault and invlpg handlers may 
    2.57 + *    update only one entry on an out-of-sync page without resyncing it. 
    2.58 + *
    2.59 + * 3. Operations on shadows that do not start from a guest page need to
    2.60 + *    be aware that they may be handling an out-of-sync shadow.
    2.61 + *
    2.62 + * 4. Operations that do not normally take the shadow lock (fast-path 
    2.63 + *    #PF handler, INVLPG) must fall back to a locking, syncing version 
    2.64 + *    if they see an out-of-sync table. 
    2.65 + *
    2.66 + * 5. Operations corresponding to guest TLB flushes (MOV CR3, INVLPG)
    2.67 + *    must explicitly resync all relevant pages or update their
    2.68 + *    shadows.
    2.69 + *
    2.70 + * Currently out-of-sync pages are listed in a simple open-addressed
    2.71 + * hash table with a second chance (must resist temptation to radically
    2.72 + * over-engineer hash tables...)  The virtual address of the access
    2.73 + * which caused us to unsync the page is also kept in the hash table, as
    2.74 + * a hint for finding the writable mappings later.
    2.75 + *
    2.76 + * We keep a hash per vcpu, because we want as much as possible to do
    2.77 + * the re-sync on the save vcpu we did the unsync on, so the VA hint
    2.78 + * will be valid.
    2.79 + */
    2.80 +
    2.81 +
    2.82 +#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL
    2.83 +static void sh_oos_audit(struct domain *d) 
    2.84 +{
    2.85 +    int idx, expected_idx, expected_idx_alt;
    2.86 +    struct page_info *pg;
    2.87 +    struct vcpu *v;
    2.88 +    
    2.89 +    for_each_vcpu(d, v) 
    2.90 +    {
    2.91 +        for ( idx = 0; idx < SHADOW_OOS_PAGES; idx++ )
    2.92 +        {
    2.93 +            mfn_t *oos = v->arch.paging.shadow.oos;
    2.94 +            if ( !mfn_valid(oos[idx]) )
    2.95 +                continue;
    2.96 +            
    2.97 +            expected_idx = mfn_x(oos[idx]) % SHADOW_OOS_PAGES;
    2.98 +            expected_idx_alt = ((expected_idx + 1) % SHADOW_OOS_PAGES);
    2.99 +            if ( idx != expected_idx && idx != expected_idx_alt )
   2.100 +            {
   2.101 +                printk("%s: idx %d contains gmfn %lx, expected at %d or %d.\n",
   2.102 +                       __func__, idx, mfn_x(oos[idx]), 
   2.103 +                       expected_idx, expected_idx_alt);
   2.104 +                BUG();
   2.105 +            }
   2.106 +            pg = mfn_to_page(oos[idx]);
   2.107 +            if ( !(pg->count_info & PGC_page_table) )
   2.108 +            {
   2.109 +                printk("%s: idx %x gmfn %lx not a pt (count %"PRIx32")\n",
   2.110 +                       __func__, idx, mfn_x(oos[idx]), pg->count_info);
   2.111 +                BUG();
   2.112 +            }
   2.113 +            if ( !(pg->shadow_flags & SHF_out_of_sync) )
   2.114 +            {
   2.115 +                printk("%s: idx %x gmfn %lx not marked oos (flags %lx)\n",
   2.116 +                       __func__, idx, mfn_x(oos[idx]), pg->shadow_flags);
   2.117 +                BUG();
   2.118 +            }
   2.119 +            if ( (pg->shadow_flags & SHF_page_type_mask & ~SHF_L1_ANY) )
   2.120 +            {
   2.121 +                printk("%s: idx %x gmfn %lx shadowed as non-l1 (flags %lx)\n",
   2.122 +                       __func__, idx, mfn_x(oos[idx]), pg->shadow_flags);
   2.123 +                BUG();
   2.124 +            }
   2.125 +        }
   2.126 +    }
   2.127 +}
   2.128 +#endif
   2.129 +
   2.130 +#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
   2.131 +void oos_audit_hash_is_present(struct domain *d, mfn_t gmfn) 
   2.132 +{
   2.133 +    int idx;
   2.134 +    struct vcpu *v;
   2.135 +    mfn_t *oos;
   2.136 +
   2.137 +    ASSERT(mfn_is_out_of_sync(gmfn));
   2.138 +    
   2.139 +    for_each_vcpu(d, v) 
   2.140 +    {
   2.141 +        oos = v->arch.paging.shadow.oos;
   2.142 +        idx = mfn_x(gmfn) % SHADOW_OOS_PAGES;
   2.143 +        if ( mfn_x(oos[idx]) != mfn_x(gmfn) )
   2.144 +            idx = (idx + 1) % SHADOW_OOS_PAGES;
   2.145 +        
   2.146 +        if ( mfn_x(oos[idx]) == mfn_x(gmfn) )
   2.147 +            return;
   2.148 +    }
   2.149 +
   2.150 +    SHADOW_ERROR("gmfn %lx marked OOS but not in hash table\n", mfn_x(gmfn));
   2.151 +    BUG();
   2.152 +}
   2.153 +#endif
   2.154 +
   2.155 +/* Update the shadow, but keep the page out of sync. */
   2.156 +static inline void _sh_resync_l1(struct vcpu *v, mfn_t gmfn)
   2.157 +{
   2.158 +    struct page_info *pg = mfn_to_page(gmfn);
   2.159 +
   2.160 +    ASSERT(mfn_valid(gmfn));
   2.161 +    ASSERT(page_is_out_of_sync(pg));
   2.162 +
   2.163 +    /* Call out to the appropriate per-mode resyncing function */
   2.164 +    if ( pg->shadow_flags & SHF_L1_32 )
   2.165 +        SHADOW_INTERNAL_NAME(sh_resync_l1, 2)(v, gmfn);
   2.166 +    else if ( pg->shadow_flags & SHF_L1_PAE )
   2.167 +        SHADOW_INTERNAL_NAME(sh_resync_l1, 3)(v, gmfn);
   2.168 +#if CONFIG_PAGING_LEVELS >= 4
   2.169 +    else if ( pg->shadow_flags & SHF_L1_64 )
   2.170 +        SHADOW_INTERNAL_NAME(sh_resync_l1, 4)(v, gmfn);
   2.171 +#endif
   2.172 +}
   2.173 +
   2.174 +/* Pull all the entries on an out-of-sync page back into sync. */
   2.175 +static void _sh_resync(struct vcpu *v, mfn_t gmfn, unsigned long va)
   2.176 +{
   2.177 +    struct page_info *pg = mfn_to_page(gmfn);
   2.178 +
   2.179 +    ASSERT(shadow_locked_by_me(v->domain));
   2.180 +    ASSERT(mfn_is_out_of_sync(gmfn));
   2.181 +    /* Guest page must be shadowed *only* as L1 when out of sync. */
   2.182 +    ASSERT(!(mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask 
   2.183 +             & ~SHF_L1_ANY));
   2.184 +    ASSERT(!sh_page_has_multiple_shadows(mfn_to_page(gmfn)));
   2.185 +
   2.186 +    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, va=%lx\n",
   2.187 +                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
   2.188 +
   2.189 +    /* Need to pull write access so the page *stays* in sync. 
   2.190 +     * This might be rather slow but we hope that in the common case 
   2.191 +     * we're handling this pagetable after a guest walk has pulled 
   2.192 +     * write access the fast way. */
   2.193 +    switch ( sh_remove_write_access(v, gmfn, 0, va) )
   2.194 +    {
   2.195 +    default:
   2.196 +    case 0:
   2.197 +        break;
   2.198 +
   2.199 +    case 1:
   2.200 +        flush_tlb_mask(v->domain->domain_dirty_cpumask);
   2.201 +        break;
   2.202 +
   2.203 +    case -1:
   2.204 +        /* An unfindable writeable typecount has appeared, probably via a
   2.205 +         * grant table entry: can't shoot the mapping, so try to unshadow 
   2.206 +         * the page.  If that doesn't work either, the guest is granting
   2.207 +         * his pagetables and must be killed after all. */
   2.208 +        sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
   2.209 +        return;
   2.210 +    }
   2.211 +
   2.212 +    /* No more writable mappings of this page, please */
   2.213 +    pg->shadow_flags &= ~SHF_oos_may_write;
   2.214 +
   2.215 +    /* Update the shadows with current guest entries. */
   2.216 +    _sh_resync_l1(v, gmfn);
   2.217 +
   2.218 +    /* Now we know all the entries are synced, and will stay that way */
   2.219 +    pg->shadow_flags &= ~SHF_out_of_sync;
   2.220 +    perfc_incr(shadow_resync);
   2.221 +}
   2.222 +
   2.223 +
   2.224 +/* Add an MFN to the list of out-of-sync guest pagetables */
   2.225 +static void oos_hash_add(struct vcpu *v, mfn_t gmfn, unsigned long va)
   2.226 +{
   2.227 +    int idx;
   2.228 +    mfn_t *oos = v->arch.paging.shadow.oos;
   2.229 +    unsigned long *oos_va = v->arch.paging.shadow.oos_va;
   2.230 +
   2.231 +    idx = mfn_x(gmfn) % SHADOW_OOS_PAGES;
   2.232 +    if ( mfn_valid(oos[idx]) 
   2.233 +         && (mfn_x(oos[idx]) % SHADOW_OOS_PAGES) == idx )
   2.234 +    {
   2.235 +        /* Punt the current occupant into the next slot */
   2.236 +        SWAP(oos[idx], gmfn);
   2.237 +        SWAP(oos_va[idx], va);
   2.238 +        idx = (idx + 1) % SHADOW_OOS_PAGES;
   2.239 +    }
   2.240 +    if ( mfn_valid(oos[idx]) )
   2.241 +   {
   2.242 +        /* Crush the current occupant. */
   2.243 +        _sh_resync(v, oos[idx], oos_va[idx]);
   2.244 +        perfc_incr(shadow_unsync_evict);
   2.245 +    }
   2.246 +    oos[idx] = gmfn;
   2.247 +    oos_va[idx] = va;
   2.248 +}
   2.249 +
   2.250 +/* Remove an MFN from the list of out-of-sync guest pagetables */
   2.251 +static void oos_hash_remove(struct vcpu *v, mfn_t gmfn)
   2.252 +{
   2.253 +    int idx;
   2.254 +    mfn_t *oos;
   2.255 +    struct domain *d = v->domain;
   2.256 +
   2.257 +    SHADOW_PRINTK("D%dV%d gmfn %lx\n",
   2.258 +                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn)); 
   2.259 +
   2.260 +    for_each_vcpu(d, v) 
   2.261 +    {
   2.262 +        oos = v->arch.paging.shadow.oos;
   2.263 +        idx = mfn_x(gmfn) % SHADOW_OOS_PAGES;
   2.264 +        if ( mfn_x(oos[idx]) != mfn_x(gmfn) )
   2.265 +            idx = (idx + 1) % SHADOW_OOS_PAGES;
   2.266 +        if ( mfn_x(oos[idx]) == mfn_x(gmfn) )
   2.267 +        {
   2.268 +            oos[idx] = _mfn(INVALID_MFN);
   2.269 +            return;
   2.270 +        }
   2.271 +    }
   2.272 +
   2.273 +    SHADOW_ERROR("gmfn %lx was OOS but not in hash table\n", mfn_x(gmfn));
   2.274 +    BUG();
   2.275 +}
   2.276 +
   2.277 +/* Pull a single guest page back into sync */
   2.278 +void sh_resync(struct vcpu *v, mfn_t gmfn)
   2.279 +{
   2.280 +    int idx;
   2.281 +    mfn_t *oos;
   2.282 +    unsigned long *oos_va;
   2.283 +    struct domain *d = v->domain;
   2.284 +
   2.285 +    for_each_vcpu(d, v) 
   2.286 +    {
   2.287 +        oos = v->arch.paging.shadow.oos;
   2.288 +        oos_va = v->arch.paging.shadow.oos_va;
   2.289 +        idx = mfn_x(gmfn) % SHADOW_OOS_PAGES;
   2.290 +        if ( mfn_x(oos[idx]) != mfn_x(gmfn) )
   2.291 +            idx = (idx + 1) % SHADOW_OOS_PAGES;
   2.292 +        
   2.293 +        if ( mfn_x(oos[idx]) == mfn_x(gmfn) )
   2.294 +        {
   2.295 +            _sh_resync(v, gmfn, oos_va[idx]);
   2.296 +            oos[idx] = _mfn(INVALID_MFN);
   2.297 +            return;
   2.298 +        }
   2.299 +    }
   2.300 +
   2.301 +    SHADOW_ERROR("gmfn %lx was OOS but not in hash table\n", mfn_x(gmfn));
   2.302 +    BUG();
   2.303 +}
   2.304 +
   2.305 +/* Figure out whether it's definitely safe not to sync this l1 table,
   2.306 + * by making a call out to the mode in which that shadow was made. */
   2.307 +static int sh_skip_sync(struct vcpu *v, mfn_t gl1mfn)
   2.308 +{
   2.309 +    struct page_info *pg = mfn_to_page(gl1mfn);
   2.310 +    if ( pg->shadow_flags & SHF_L1_32 )
   2.311 +        return SHADOW_INTERNAL_NAME(sh_safe_not_to_sync, 2)(v, gl1mfn);
   2.312 +    else if ( pg->shadow_flags & SHF_L1_PAE )
   2.313 +        return SHADOW_INTERNAL_NAME(sh_safe_not_to_sync, 3)(v, gl1mfn);
   2.314 +#if CONFIG_PAGING_LEVELS >= 4
   2.315 +    else if ( pg->shadow_flags & SHF_L1_64 )
   2.316 +        return SHADOW_INTERNAL_NAME(sh_safe_not_to_sync, 4)(v, gl1mfn);
   2.317 +#endif
   2.318 +    SHADOW_ERROR("gmfn 0x%lx was OOS but not shadowed as an l1.\n", 
   2.319 +                 mfn_x(gl1mfn));
   2.320 +    BUG();
   2.321 +    return 0; /* BUG() is no longer __attribute__((noreturn)). */
   2.322 +}
   2.323 +
   2.324 +
   2.325 +/* Pull all out-of-sync pages back into sync.  Pages brought out of sync
   2.326 + * on other vcpus are allowed to remain out of sync, but their contents
   2.327 + * will be made safe (TLB flush semantics); pages unsynced by this vcpu
   2.328 + * are brought back into sync and write-protected.  If skip != 0, we try
   2.329 + * to avoid resyncing at all if we think we can get away with it. */
   2.330 +void sh_resync_all(struct vcpu *v, int skip, int this, int others, int do_locking)
   2.331 +{
   2.332 +    int idx;
   2.333 +    struct vcpu *other;
   2.334 +    mfn_t *oos = v->arch.paging.shadow.oos;
   2.335 +    unsigned long *oos_va = v->arch.paging.shadow.oos_va;
   2.336 +
   2.337 +    SHADOW_PRINTK("d=%d, v=%d\n", v->domain->domain_id, v->vcpu_id);
   2.338 +
   2.339 +    ASSERT(do_locking || shadow_locked_by_me(v->domain));
   2.340 +
   2.341 +    if ( !this )
   2.342 +        goto resync_others;
   2.343 +
   2.344 +    if ( do_locking )
   2.345 +        shadow_lock(v->domain);
   2.346 +
   2.347 +    /* First: resync all of this vcpu's oos pages */
   2.348 +    for ( idx = 0; idx < SHADOW_OOS_PAGES; idx++ ) 
   2.349 +        if ( mfn_valid(oos[idx]) )
   2.350 +        {
   2.351 +            /* Write-protect and sync contents */
   2.352 +            _sh_resync(v, oos[idx], oos_va[idx]);
   2.353 +            oos[idx] = _mfn(INVALID_MFN);
   2.354 +        }
   2.355 +
   2.356 +    if ( do_locking )
   2.357 +        shadow_unlock(v->domain);
   2.358 +
   2.359 + resync_others:
   2.360 +    if ( !others )
   2.361 +        return;
   2.362 +
   2.363 +    /* Second: make all *other* vcpus' oos pages safe. */
   2.364 +    for_each_vcpu(v->domain, other)
   2.365 +    {
   2.366 +        if ( v == other ) 
   2.367 +            continue;
   2.368 +
   2.369 +        if ( do_locking )
   2.370 +            shadow_lock(v->domain);
   2.371 +
   2.372 +        oos = other->arch.paging.shadow.oos;
   2.373 +        oos_va = other->arch.paging.shadow.oos_va;
   2.374 +
   2.375 +        for ( idx = 0; idx < SHADOW_OOS_PAGES; idx++ ) 
   2.376 +        {
   2.377 +            if ( !mfn_valid(oos[idx]) )
   2.378 +                continue;
   2.379 +
   2.380 +            if ( skip )
   2.381 +            {
   2.382 +                /* Update the shadows and leave the page OOS. */
   2.383 +                if ( sh_skip_sync(v, oos[idx]) )
   2.384 +                    continue;
   2.385 +                _sh_resync_l1(other, oos[idx]);
   2.386 +            }
   2.387 +            else
   2.388 +            {
   2.389 +                /* Write-protect and sync contents */
   2.390 +                _sh_resync(other, oos[idx], oos_va[idx]);
   2.391 +                oos[idx] = _mfn(INVALID_MFN);
   2.392 +            }
   2.393 +        }
   2.394 +        
   2.395 +        if ( do_locking )
   2.396 +            shadow_unlock(v->domain);
   2.397 +    }
   2.398 +}
   2.399 +
   2.400 +/* Allow a shadowed page to go out of sync */
   2.401 +int sh_unsync(struct vcpu *v, mfn_t gmfn, unsigned long va)
   2.402 +{
   2.403 +    struct page_info *pg;
   2.404 +    
   2.405 +    ASSERT(shadow_locked_by_me(v->domain));
   2.406 +
   2.407 +    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx va %lx\n",
   2.408 +                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
   2.409 +
   2.410 +    pg = mfn_to_page(gmfn);
   2.411 + 
   2.412 +    /* Guest page must be shadowed *only* as L1 and *only* once when out
   2.413 +     * of sync.  Also, get out now if it's already out of sync. 
   2.414 +     * Also, can't safely unsync if some vcpus have paging disabled.*/
   2.415 +    if ( pg->shadow_flags & 
   2.416 +         ((SHF_page_type_mask & ~SHF_L1_ANY) | SHF_out_of_sync) 
   2.417 +         || sh_page_has_multiple_shadows(pg)
   2.418 +         || !is_hvm_domain(v->domain)
   2.419 +         || !v->domain->arch.paging.shadow.oos_active )
   2.420 +        return 0;
   2.421 +
   2.422 +    pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write;
   2.423 +    oos_hash_add(v, gmfn, va);
   2.424 +    perfc_incr(shadow_unsync);
   2.425 +    return 1;
   2.426 +}
   2.427 +
   2.428 +#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
   2.429 +
   2.430  
   2.431  /**************************************************************************/
   2.432  /* Code for "promoting" a guest page to the point where the shadow code is
   2.433 @@ -440,6 +849,12 @@ void shadow_promote(struct vcpu *v, mfn_
   2.434  
   2.435      ASSERT(mfn_valid(gmfn));
   2.436  
   2.437 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   2.438 +    /* Is the page already shadowed and out of sync? */
   2.439 +    if ( page_is_out_of_sync(page) ) 
   2.440 +        sh_resync(v, gmfn);
   2.441 +#endif
   2.442 +
   2.443      /* We should never try to promote a gmfn that has writeable mappings */
   2.444      ASSERT((page->u.inuse.type_info & PGT_type_mask) != PGT_writable_page
   2.445             || (page->u.inuse.type_info & PGT_count_mask) == 0
   2.446 @@ -463,7 +878,14 @@ void shadow_demote(struct vcpu *v, mfn_t
   2.447      clear_bit(type, &page->shadow_flags);
   2.448  
   2.449      if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
   2.450 +    {
   2.451 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   2.452 +        /* Was the page out of sync? */
   2.453 +        if ( page_is_out_of_sync(page) ) 
   2.454 +            oos_hash_remove(v, gmfn);
   2.455 +#endif 
   2.456          clear_bit(_PGC_page_table, &page->count_info);
   2.457 +    }
   2.458  }
   2.459  
   2.460  /**************************************************************************/
   2.461 @@ -1297,6 +1719,27 @@ static void sh_hash_audit_bucket(struct 
   2.462              /* Bad shadow flags on guest page? */
   2.463              BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) );
   2.464              /* Bad type count on guest page? */
   2.465 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   2.466 +            if ( sp->type == SH_type_l1_32_shadow
   2.467 +                 || sp->type == SH_type_l1_pae_shadow
   2.468 +                 || sp->type == SH_type_l1_64_shadow )
   2.469 +            {
   2.470 +                if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page
   2.471 +                     && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
   2.472 +                {
   2.473 +                    if ( !page_is_out_of_sync(gpg) )
   2.474 +                    {
   2.475 +                        SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
   2.476 +                                     " and not OOS but has typecount %#lx\n",
   2.477 +                                     sp->backpointer, 
   2.478 +                                     mfn_x(shadow_page_to_mfn(sp)), 
   2.479 +                                     gpg->u.inuse.type_info);
   2.480 +                        BUG();
   2.481 +                    }
   2.482 +                }
   2.483 +            }
   2.484 +            else /* Not an l1 */
   2.485 +#endif
   2.486              if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 
   2.487                   && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
   2.488              {
   2.489 @@ -1608,7 +2051,8 @@ void sh_destroy_shadow(struct vcpu *v, m
   2.490  /* Remove all writeable mappings of a guest frame from the shadow tables 
   2.491   * Returns non-zero if we need to flush TLBs. 
   2.492   * level and fault_addr desribe how we found this to be a pagetable;
   2.493 - * level==0 means we have some other reason for revoking write access.*/
   2.494 + * level==0 means we have some other reason for revoking write access.
   2.495 + * If level==0 we are allowed to fail, returning -1. */
   2.496  
   2.497  int sh_remove_write_access(struct vcpu *v, mfn_t gmfn, 
   2.498                             unsigned int level,
   2.499 @@ -1659,7 +2103,12 @@ int sh_remove_write_access(struct vcpu *
   2.500          return 0;
   2.501  
   2.502      /* Early exit if it's already a pagetable, or otherwise not writeable */
   2.503 -    if ( sh_mfn_is_a_page_table(gmfn) 
   2.504 +    if ( (sh_mfn_is_a_page_table(gmfn)
   2.505 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   2.506 +         /* Unless they've been allowed to go out of sync with their shadows */
   2.507 +           && !mfn_oos_may_write(gmfn)
   2.508 +#endif
   2.509 +         )
   2.510           || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
   2.511          return 0;
   2.512  
   2.513 @@ -1676,7 +2125,7 @@ int sh_remove_write_access(struct vcpu *
   2.514      }
   2.515  
   2.516  #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
   2.517 -    if ( v == current && level != 0 )
   2.518 +    if ( v == current )
   2.519      {
   2.520          unsigned long gfn;
   2.521          /* Heuristic: there is likely to be only one writeable mapping,
   2.522 @@ -1690,6 +2139,8 @@ int sh_remove_write_access(struct vcpu *
   2.523                  return 1;                                                 \
   2.524          } while (0)
   2.525  
   2.526 +        if ( level == 0 && fault_addr )
   2.527 +            GUESS(fault_addr, 6);
   2.528          
   2.529          if ( v->arch.paging.mode->guest_levels == 2 )
   2.530          {
   2.531 @@ -1780,6 +2231,9 @@ int sh_remove_write_access(struct vcpu *
   2.532       * mapping -- ioreq page, grant mapping, &c. */
   2.533      if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) != 0 )
   2.534      {
   2.535 +        if ( level == 0 )
   2.536 +            return -1;
   2.537 +
   2.538          SHADOW_ERROR("can't remove write access to mfn %lx: guest has "
   2.539                        "%lu special-use mappings of it\n", mfn_x(gmfn),
   2.540                        (mfn_to_page(gmfn)->u.inuse.type_info&PGT_count_mask));
   2.541 @@ -2159,6 +2613,13 @@ static void sh_update_paging_modes(struc
   2.542          ASSERT(shadow_mode_translate(d));
   2.543          ASSERT(shadow_mode_external(d));
   2.544  
   2.545 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   2.546 +        /* Need to resync all our pages now, because if a page goes out
   2.547 +         * of sync with paging enabled and is resynced with paging
   2.548 +         * disabled, the resync will go wrong. */
   2.549 +        shadow_resync_all(v, 0);
   2.550 +#endif /* OOS */
   2.551 +
   2.552          if ( !hvm_paging_enabled(v) )
   2.553          {
   2.554              /* When the guest has CR0.PG clear, we provide a 32-bit, non-PAE
   2.555 @@ -2254,6 +2715,27 @@ static void sh_update_paging_modes(struc
   2.556          //        This *does* happen, at least for CR4.PGE...
   2.557      }
   2.558  
   2.559 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   2.560 +    /* We need to check that all the vcpus have paging enabled to
   2.561 +     * unsync PTs. */
   2.562 +    if ( is_hvm_domain(d) )
   2.563 +    {
   2.564 +        int pe = 1;
   2.565 +        struct vcpu *vptr;
   2.566 +
   2.567 +        for_each_vcpu(d, vptr)
   2.568 +        {
   2.569 +            if ( !hvm_paging_enabled(vptr) )
   2.570 +            {
   2.571 +                pe = 0;
   2.572 +                break;
   2.573 +            }
   2.574 +        }
   2.575 +
   2.576 +        d->arch.paging.shadow.oos_active = pe;
   2.577 +    }
   2.578 +#endif /* OOS */
   2.579 +
   2.580      v->arch.paging.mode->update_cr3(v, 0);
   2.581  }
   2.582  
   2.583 @@ -3044,7 +3526,11 @@ void shadow_audit_tables(struct vcpu *v)
   2.584  
   2.585      if ( !(SHADOW_AUDIT_ENABLE) )
   2.586          return;
   2.587 -    
   2.588 +
   2.589 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   2.590 +    sh_oos_audit(v->domain);
   2.591 +#endif
   2.592 +
   2.593      if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL )
   2.594          mask = ~1; /* Audit every table in the system */
   2.595      else 
     3.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Jun 20 18:37:29 2008 +0100
     3.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Jun 20 18:39:45 2008 +0100
     3.3 @@ -305,22 +305,54 @@ shadow_check_gwalk(struct vcpu *v, unsig
     3.4  }
     3.5  
     3.6  /* Remove write access permissions from a gwalk_t in a batch, and
     3.7 - * return OR-ed result for TLB flush hint
     3.8 + * return OR-ed result for TLB flush hint and need to rewalk the guest
     3.9 + * pages.
    3.10 + *
    3.11 + * Syncing pages will remove write access to that page; but it may
    3.12 + * also give write access to other pages in the path. If we resync any
    3.13 + * pages, re-walk from the beginning.
    3.14   */
    3.15 +#define GW_RMWR_FLUSHTLB 1
    3.16 +#define GW_RMWR_REWALK   2
    3.17 +
    3.18  static inline uint32_t
    3.19  gw_remove_write_accesses(struct vcpu *v, unsigned long va, walk_t *gw)
    3.20  {
    3.21 -    int rc = 0;
    3.22 +    uint32_t rc = 0;
    3.23  
    3.24  #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
    3.25  #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
    3.26 -    rc = sh_remove_write_access(v, gw->l3mfn, 3, va);
    3.27 -#endif
    3.28 -    rc |= sh_remove_write_access(v, gw->l2mfn, 2, va);
    3.29 -#endif
    3.30 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
    3.31 +    if ( mfn_is_out_of_sync(gw->l3mfn) )
    3.32 +    {
    3.33 +        sh_resync(v, gw->l3mfn);
    3.34 +        rc = GW_RMWR_REWALK;
    3.35 +    }
    3.36 +    else
    3.37 +#endif /* OOS */
    3.38 +     if ( sh_remove_write_access(v, gw->l3mfn, 3, va) )
    3.39 +         rc = GW_RMWR_FLUSHTLB;
    3.40 +#endif /* GUEST_PAGING_LEVELS >= 4 */
    3.41 +
    3.42 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
    3.43 +    if ( mfn_is_out_of_sync(gw->l2mfn) )
    3.44 +    {
    3.45 +        sh_resync(v, gw->l2mfn);
    3.46 +        rc |= GW_RMWR_REWALK;
    3.47 +    }
    3.48 +    else
    3.49 +#endif /* OOS */
    3.50 +    if ( sh_remove_write_access(v, gw->l2mfn, 2, va) )
    3.51 +        rc |= GW_RMWR_FLUSHTLB;
    3.52 +#endif /* GUEST_PAGING_LEVELS >= 3 */
    3.53 +
    3.54      if ( !(guest_supports_superpages(v) &&
    3.55 -           (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
    3.56 -        rc |= sh_remove_write_access(v, gw->l1mfn, 1, va);
    3.57 +           (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE))
    3.58 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
    3.59 +         && !mfn_is_out_of_sync(gw->l1mfn)
    3.60 +#endif /* OOS */
    3.61 +         && sh_remove_write_access(v, gw->l1mfn, 1, va) )
    3.62 +        rc |= GW_RMWR_FLUSHTLB;
    3.63  
    3.64      return rc;
    3.65  }
    3.66 @@ -882,7 +914,12 @@ static always_inline void
    3.67      
    3.68      // protect guest page tables
    3.69      //
    3.70 -    if ( unlikely((level == 1) && sh_mfn_is_a_page_table(target_mfn)) )
    3.71 +    if ( unlikely((level == 1) 
    3.72 +                  && sh_mfn_is_a_page_table(target_mfn)
    3.73 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
    3.74 +                  && !mfn_oos_may_write(target_mfn)
    3.75 +#endif /* OOS */
    3.76 +                  ) )
    3.77      {
    3.78          if ( shadow_mode_trap_reads(d) )
    3.79          {
    3.80 @@ -1125,6 +1162,9 @@ static int shadow_set_l4e(struct vcpu *v
    3.81              domain_crash(v->domain);
    3.82              return SHADOW_SET_ERROR;
    3.83          }
    3.84 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
    3.85 +        shadow_resync_all(v, 0);
    3.86 +#endif
    3.87      }
    3.88  
    3.89      /* Write the new entry */
    3.90 @@ -1163,12 +1203,17 @@ static int shadow_set_l3e(struct vcpu *v
    3.91               | (((unsigned long)sl3e) & ~PAGE_MASK));
    3.92      
    3.93      if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT )
    3.94 +    {
    3.95          /* About to install a new reference */        
    3.96          if ( !sh_get_ref(v, shadow_l3e_get_mfn(new_sl3e), paddr) )
    3.97          {
    3.98              domain_crash(v->domain);
    3.99              return SHADOW_SET_ERROR;
   3.100 -        } 
   3.101 +        }
   3.102 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
   3.103 +        shadow_resync_all(v, 0);
   3.104 +#endif
   3.105 +    }
   3.106  
   3.107      /* Write the new entry */
   3.108      shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
   3.109 @@ -1219,12 +1264,29 @@ static int shadow_set_l2e(struct vcpu *v
   3.110               | (((unsigned long)sl2e) & ~PAGE_MASK));
   3.111  
   3.112      if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT ) 
   3.113 +    {
   3.114 +        mfn_t sl1mfn = shadow_l2e_get_mfn(new_sl2e);
   3.115 +
   3.116          /* About to install a new reference */
   3.117 -        if ( !sh_get_ref(v, shadow_l2e_get_mfn(new_sl2e), paddr) )
   3.118 +        if ( !sh_get_ref(v, sl1mfn, paddr) )
   3.119          {
   3.120              domain_crash(v->domain);
   3.121              return SHADOW_SET_ERROR;
   3.122 -        } 
   3.123 +        }
   3.124 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   3.125 +        {
   3.126 +            struct shadow_page_info *sp = mfn_to_shadow_page(sl1mfn);
   3.127 +            mfn_t gl1mfn = _mfn(sp->backpointer);
   3.128 +
   3.129 +            /* If the shadow is a fl1 then the backpointer contains
   3.130 +               the GFN instead of the GMFN, and it's definitely not
   3.131 +               OOS. */
   3.132 +            if ( (sp->type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
   3.133 +                 && mfn_is_out_of_sync(gl1mfn) )
   3.134 +                sh_resync(v, gl1mfn);
   3.135 +        }
   3.136 +#endif
   3.137 +    }
   3.138  
   3.139      /* Write the new entry */
   3.140  #if GUEST_PAGING_LEVELS == 2
   3.141 @@ -2544,6 +2606,97 @@ static int validate_gl1e(struct vcpu *v,
   3.142      return result;
   3.143  }
   3.144  
   3.145 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   3.146 +/**************************************************************************/
   3.147 +/* Special validation function for re-syncing out-of-sync shadows. 
   3.148 + * Walks the *shadow* page, and for every entry that it finds,
   3.149 + * revalidates the guest entry that corresponds to it.
   3.150 + * N.B. This function is called with the vcpu that unsynced the page,
   3.151 + *      *not* the one that is causing it to be resynced. */
   3.152 +void sh_resync_l1(struct vcpu *v, mfn_t gmfn)
   3.153 +{
   3.154 +    mfn_t sl1mfn;
   3.155 +    shadow_l1e_t *sl1p;
   3.156 +    guest_l1e_t *gl1p, *gp;
   3.157 +    int rc = 0;
   3.158 +
   3.159 +    sl1mfn = get_shadow_status(v, gmfn, SH_type_l1_shadow);
   3.160 +    ASSERT(mfn_valid(sl1mfn)); /* Otherwise we would not have been called */
   3.161 +
   3.162 +    gp = sh_map_domain_page(gmfn);
   3.163 +    gl1p = gp;
   3.164 +
   3.165 +    SHADOW_FOREACH_L1E(sl1mfn, sl1p, &gl1p, 0, {
   3.166 +        rc |= validate_gl1e(v, gl1p, sl1mfn, sl1p);
   3.167 +    });
   3.168 +
   3.169 +    sh_unmap_domain_page(gp);
   3.170 +
   3.171 +    /* Setting shadow L1 entries should never need us to flush the TLB */
   3.172 +    ASSERT(!(rc & SHADOW_SET_FLUSH));
   3.173 +}
   3.174 +
   3.175 +/* Figure out whether it's definitely safe not to sync this l1 table. 
   3.176 + * That is: if we can tell that it's only used once, and that the 
   3.177 + * toplevel shadow responsible is not one of ours. 
   3.178 + * N.B. This function is called with the vcpu that required the resync, 
   3.179 + *      *not* the one that originally unsynced the page, but it is
   3.180 + *      called in the *mode* of the vcpu that unsynced it.  Clear?  Good. */
   3.181 +int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn)
   3.182 +{
   3.183 +    struct shadow_page_info *sp;
   3.184 +    mfn_t smfn;
   3.185 +
   3.186 +    smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
   3.187 +    ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
   3.188 +    
   3.189 +    /* Up to l2 */
   3.190 +    sp = mfn_to_shadow_page(smfn);
   3.191 +    if ( sp->count != 1 || !sp->up )
   3.192 +        return 0;
   3.193 +    smfn = _mfn(sp->up >> PAGE_SHIFT);
   3.194 +    ASSERT(mfn_valid(smfn));
   3.195 +
   3.196 +#if (SHADOW_PAGING_LEVELS == 4) 
   3.197 +    /* up to l3 */
   3.198 +    sp = mfn_to_shadow_page(smfn);
   3.199 +    if ( sp->count != 1 || !sp->up )
   3.200 +        return 0;
   3.201 +    smfn = _mfn(sp->up >> PAGE_SHIFT);
   3.202 +    ASSERT(mfn_valid(smfn));
   3.203 +
   3.204 +    /* up to l4 */
   3.205 +    sp = mfn_to_shadow_page(smfn);
   3.206 +    if ( sp->count != 1 
   3.207 +         || sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up )
   3.208 +        return 0;
   3.209 +    smfn = _mfn(sp->up >> PAGE_SHIFT);
   3.210 +    ASSERT(mfn_valid(smfn));
   3.211 +
   3.212 +#if (GUEST_PAGING_LEVELS == 2)
   3.213 +    /* In 2-on-3 shadow mode the up pointer contains the link to the
   3.214 +     * shadow page, but the shadow_table contains only the first of the
   3.215 +     * four pages that makes the PAE top shadow tables. */
   3.216 +    smfn = _mfn(mfn_x(smfn) & ~0x3UL);
   3.217 +#endif
   3.218 +
   3.219 +#endif
   3.220 +
   3.221 +    if ( pagetable_get_pfn(v->arch.shadow_table[0]) == mfn_x(smfn)
   3.222 +#if (SHADOW_PAGING_LEVELS == 3) 
   3.223 +         || pagetable_get_pfn(v->arch.shadow_table[1]) == mfn_x(smfn)
   3.224 +         || pagetable_get_pfn(v->arch.shadow_table[2]) == mfn_x(smfn)
   3.225 +         || pagetable_get_pfn(v->arch.shadow_table[3]) == mfn_x(smfn) 
   3.226 +#endif
   3.227 +        )
   3.228 +        return 0;
   3.229 +    
   3.230 +    /* Only in use in one toplevel shadow, and it's not the one we're 
   3.231 +     * running on */
   3.232 +    return 1;
   3.233 +}
   3.234 +#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
   3.235 +
   3.236  
   3.237  /**************************************************************************/
   3.238  /* Functions which translate and install the shadows of arbitrary guest 
   3.239 @@ -2805,6 +2958,7 @@ static int sh_page_fault(struct vcpu *v,
   3.240      int r;
   3.241      fetch_type_t ft = 0;
   3.242      p2m_type_t p2mt;
   3.243 +    uint32_t rc;
   3.244  #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
   3.245      int fast_emul = 0;
   3.246  #endif
   3.247 @@ -2830,6 +2984,17 @@ static int sh_page_fault(struct vcpu *v,
   3.248          {
   3.249              fast_emul = 1;
   3.250              gmfn = _mfn(v->arch.paging.shadow.last_emulated_mfn);
   3.251 +
   3.252 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   3.253 +            /* Fall back to the slow path if we're trying to emulate
   3.254 +               writes to an out of sync page. */
   3.255 +            if ( mfn_valid(gmfn) && mfn_is_out_of_sync(gmfn) )
   3.256 +            {
   3.257 +                v->arch.paging.last_write_emul_ok = 0;
   3.258 +                goto page_fault_slow_path;
   3.259 +            }
   3.260 +#endif /* OOS */
   3.261 +
   3.262              perfc_incr(shadow_fault_fast_emulate);
   3.263              goto early_emulation;
   3.264          }
   3.265 @@ -2855,6 +3020,31 @@ static int sh_page_fault(struct vcpu *v,
   3.266                                        sizeof(sl1e)) == 0)
   3.267                      && sh_l1e_is_magic(sl1e)) )
   3.268          {
   3.269 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   3.270 +             /* First, need to check that this isn't an out-of-sync
   3.271 +              * shadow l1e.  If it is, we fall back to the slow path, which
   3.272 +              * will sync it up again. */
   3.273 +            {
   3.274 +                shadow_l2e_t sl2e;
   3.275 +                mfn_t gl1mfn;
   3.276 +               if ( (__copy_from_user(&sl2e,
   3.277 +                                       (sh_linear_l2_table(v)
   3.278 +                                        + shadow_l2_linear_offset(va)),
   3.279 +                                       sizeof(sl2e)) != 0)
   3.280 +                     || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
   3.281 +                     || !mfn_valid(gl1mfn = _mfn(mfn_to_shadow_page(
   3.282 +                                      shadow_l2e_get_mfn(sl2e))->backpointer))
   3.283 +                     || unlikely(mfn_is_out_of_sync(gl1mfn)) )
   3.284 +               {
   3.285 +                   /* Hit the slow path as if there had been no 
   3.286 +                    * shadow entry at all, and let it tidy up */
   3.287 +                   ASSERT(regs->error_code & PFEC_page_present);
   3.288 +                   regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
   3.289 +                   goto page_fault_slow_path;
   3.290 +               }
   3.291 +            }
   3.292 +#endif /* SHOPT_OUT_OF_SYNC */
   3.293 +
   3.294              if ( sh_l1e_is_gnp(sl1e) )
   3.295              {
   3.296                  /* Not-present in a guest PT: pass to the guest as
   3.297 @@ -2890,6 +3080,10 @@ static int sh_page_fault(struct vcpu *v,
   3.298              return EXCRET_fault_fixed;
   3.299          }
   3.300      }
   3.301 +
   3.302 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   3.303 + page_fault_slow_path:
   3.304 +#endif
   3.305  #endif /* SHOPT_FAST_FAULT_PATH */
   3.306  
   3.307      /* Detect if this page fault happened while we were already in Xen
   3.308 @@ -2904,7 +3098,21 @@ static int sh_page_fault(struct vcpu *v,
   3.309          return 0;
   3.310      }
   3.311  
   3.312 -    if ( guest_walk_tables(v, va, &gw, regs->error_code) != 0 )
   3.313 + rewalk:
   3.314 +    rc = guest_walk_tables(v, va, &gw, regs->error_code);
   3.315 +
   3.316 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   3.317 +    if ( !(rc & _PAGE_PRESENT) )
   3.318 +        regs->error_code |= PFEC_page_present;
   3.319 +    else if ( regs->error_code & PFEC_page_present )
   3.320 +    {
   3.321 +            SHADOW_ERROR("OOS paranoia: Something is wrong in guest TLB"
   3.322 +                         " flushing. Have fun debugging it.\n");
   3.323 +            regs->error_code &= ~PFEC_page_present;
   3.324 +    }
   3.325 +#endif
   3.326 +
   3.327 +    if ( rc != 0 )
   3.328      {
   3.329          perfc_incr(shadow_fault_bail_real_fault);
   3.330          SHADOW_PRINTK("not a shadow fault\n");
   3.331 @@ -2948,7 +3156,10 @@ static int sh_page_fault(struct vcpu *v,
   3.332  
   3.333      shadow_lock(d);
   3.334  
   3.335 -    if ( gw_remove_write_accesses(v, va, &gw) )
   3.336 +    rc = gw_remove_write_accesses(v, va, &gw);
   3.337 +
   3.338 +    /* First bit set: Removed write access to a page. */
   3.339 +    if ( rc & GW_RMWR_FLUSHTLB )
   3.340      {
   3.341          /* Write permission removal is also a hint that other gwalks
   3.342           * overlapping with this one may be inconsistent
   3.343 @@ -2958,11 +3169,20 @@ static int sh_page_fault(struct vcpu *v,
   3.344          flush_tlb_mask(d->domain_dirty_cpumask);
   3.345      }
   3.346  
   3.347 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   3.348 +    /* Second bit set: Resynced a page. Re-walk needed. */
   3.349 +    if ( rc & GW_RMWR_REWALK )
   3.350 +    {
   3.351 +        shadow_unlock(d);
   3.352 +        goto rewalk;
   3.353 +    }
   3.354 +#endif /* OOS */
   3.355 +
   3.356      if ( !shadow_check_gwalk(v, va, &gw) )
   3.357      {
   3.358          perfc_incr(shadow_inconsistent_gwalk);
   3.359          shadow_unlock(d);
   3.360 -        return EXCRET_fault_fixed;
   3.361 +        goto rewalk;
   3.362      }
   3.363  
   3.364      shadow_audit_tables(v);
   3.365 @@ -3001,7 +3221,12 @@ static int sh_page_fault(struct vcpu *v,
   3.366  #endif
   3.367  
   3.368      /* Need to emulate accesses to page tables */
   3.369 -    if ( sh_mfn_is_a_page_table(gmfn) )
   3.370 +    if ( sh_mfn_is_a_page_table(gmfn)
   3.371 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   3.372 +         /* Unless they've been allowed to go out of sync with their shadows */
   3.373 +         && !mfn_is_out_of_sync(gmfn)
   3.374 +#endif
   3.375 +         )
   3.376      {
   3.377          if ( ft == ft_demand_write )
   3.378          {
   3.379 @@ -3215,6 +3440,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
   3.380   * instruction should be issued on the hardware, or 0 if it's safe not
   3.381   * to do so. */
   3.382  {
   3.383 +    mfn_t sl1mfn;
   3.384      shadow_l2e_t sl2e;
   3.385      
   3.386      perfc_incr(shadow_invlpg);
   3.387 @@ -3278,13 +3504,65 @@ sh_invlpg(struct vcpu *v, unsigned long 
   3.388      // If so, then we'll need to flush the entire TLB (because that's
   3.389      // easier than invalidating all of the individual 4K pages).
   3.390      //
   3.391 -    if ( mfn_to_shadow_page(shadow_l2e_get_mfn(sl2e))->type
   3.392 +    sl1mfn = shadow_l2e_get_mfn(sl2e);
   3.393 +    if ( mfn_to_shadow_page(sl1mfn)->type
   3.394           == SH_type_fl1_shadow )
   3.395      {
   3.396          flush_tlb_local();
   3.397          return 0;
   3.398      }
   3.399  
   3.400 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   3.401 +    /* Check to see if the SL1 is out of sync. */
   3.402 +    {
   3.403 +        mfn_t gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
   3.404 +        struct page_info *pg = mfn_to_page(gl1mfn);
   3.405 +        if ( mfn_valid(gl1mfn) 
   3.406 +             && page_is_out_of_sync(pg) )
   3.407 +        {
   3.408 +            /* The test above may give false positives, since we don't
   3.409 +             * hold the shadow lock yet.  Check again with the lock held. */
   3.410 +            shadow_lock(v->domain);
   3.411 +
   3.412 +            /* This must still be a copy-from-user because we didn't
   3.413 +             * have the shadow lock last time we checked, and the
   3.414 +             * higher-level shadows might have disappeared under our
   3.415 +             * feet. */
   3.416 +            if ( __copy_from_user(&sl2e, 
   3.417 +                                  sh_linear_l2_table(v)
   3.418 +                                  + shadow_l2_linear_offset(va),
   3.419 +                                  sizeof (sl2e)) != 0 )
   3.420 +            {
   3.421 +                perfc_incr(shadow_invlpg_fault);
   3.422 +                shadow_unlock(v->domain);
   3.423 +                return 0;
   3.424 +            }
   3.425 +
   3.426 +            if ( !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) )
   3.427 +            {
   3.428 +                shadow_unlock(v->domain);
   3.429 +                return 0;
   3.430 +            }
   3.431 +
   3.432 +            sl1mfn = shadow_l2e_get_mfn(sl2e);
   3.433 +            gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
   3.434 +            pg = mfn_to_page(gl1mfn);
   3.435 +            
   3.436 +            if ( likely(sh_mfn_is_a_page_table(gl1mfn)
   3.437 +                        && page_is_out_of_sync(pg) ) )
   3.438 +            {
   3.439 +                shadow_l1e_t *sl1;
   3.440 +                sl1 = sh_linear_l1_table(v) + shadow_l1_linear_offset(va);
   3.441 +                /* Remove the shadow entry that maps this VA */
   3.442 +                (void) shadow_set_l1e(v, sl1, shadow_l1e_empty(), sl1mfn);
   3.443 +            }
   3.444 +            shadow_unlock(v->domain);
   3.445 +            /* Need the invlpg, to pick up the disappeareance of the sl1e */
   3.446 +            return 1;
   3.447 +        }
   3.448 +    }
   3.449 +#endif
   3.450 +
   3.451      return 1;
   3.452  }
   3.453  
   3.454 @@ -3710,6 +3988,13 @@ sh_update_cr3(struct vcpu *v, int do_loc
   3.455          return;
   3.456      }
   3.457  
   3.458 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   3.459 +    /* Need to resync all the shadow entries on a TLB flush.  Resync
   3.460 +     * current vcpus OOS pages before switching to the new shadow
   3.461 +     * tables so that the VA hint is still valid.  */
   3.462 +    shadow_resync_current_vcpu(v, do_locking);
   3.463 +#endif
   3.464 +
   3.465      if ( do_locking ) shadow_lock(v->domain);
   3.466  
   3.467      ASSERT(shadow_locked_by_me(v->domain));
   3.468 @@ -3938,6 +4223,15 @@ sh_update_cr3(struct vcpu *v, int do_loc
   3.469  
   3.470      /* Release the lock, if we took it (otherwise it's the caller's problem) */
   3.471      if ( do_locking ) shadow_unlock(v->domain);
   3.472 +
   3.473 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   3.474 +    /* Need to resync all the shadow entries on a TLB flush. We only
   3.475 +     * update the shadows, leaving the pages out of sync. Also, we try
   3.476 +     * to skip synchronization of shadows not mapped in the new
   3.477 +     * tables. */
   3.478 +    shadow_sync_other_vcpus(v, do_locking);
   3.479 +#endif
   3.480 +
   3.481  }
   3.482  
   3.483  
   3.484 @@ -4437,23 +4731,35 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
   3.485  
   3.486  #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
   3.487  
   3.488 -#define AUDIT_FAIL(_level, _fmt, _a...) do {                               \
   3.489 -    printk("Shadow %u-on-%u audit failed at level %i, index %i\n"         \
   3.490 -           "gl" #_level "mfn = %" PRI_mfn                              \
   3.491 -           " sl" #_level "mfn = %" PRI_mfn                             \
   3.492 -           " &gl" #_level "e = %p &sl" #_level "e = %p"                    \
   3.493 -           " gl" #_level "e = %" SH_PRI_gpte                              \
   3.494 -           " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n",        \
   3.495 -           GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS,                      \
   3.496 -           _level, guest_index(gl ## _level ## e),                         \
   3.497 -           mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn),         \
   3.498 -           gl ## _level ## e, sl ## _level ## e,                           \
   3.499 -           gl ## _level ## e->l ## _level, sl ## _level ## e->l ## _level, \
   3.500 -           ##_a);                                                          \
   3.501 -    BUG();                                                                 \
   3.502 -    done = 1;                                                              \
   3.503 +#define AUDIT_FAIL(_level, _fmt, _a...) do {                            \
   3.504 +    printk("Shadow %u-on-%u audit failed at level %i, index %i\n"       \
   3.505 +           "gl" #_level "mfn = %" PRI_mfn                               \
   3.506 +           " sl" #_level "mfn = %" PRI_mfn                              \
   3.507 +           " &gl" #_level "e = %p &sl" #_level "e = %p"                 \
   3.508 +           " gl" #_level "e = %" SH_PRI_gpte                            \
   3.509 +           " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n",      \
   3.510 +           GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS,                   \
   3.511 +               _level, guest_index(gl ## _level ## e),                  \
   3.512 +               mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn),  \
   3.513 +               gl ## _level ## e, sl ## _level ## e,                    \
   3.514 +               gl ## _level ## e->l ## _level, sl ## _level ## e->l ## _level, \
   3.515 +               ##_a);                                                   \
   3.516 +        BUG();                                                          \
   3.517 +        done = 1;                                                       \
   3.518  } while (0)
   3.519  
   3.520 +#define AUDIT_FAIL_MIN(_level, _fmt, _a...) do {                        \
   3.521 +    printk("Shadow %u-on-%u audit failed at level %i\n"                 \
   3.522 +           "gl" #_level "mfn = %" PRI_mfn                               \
   3.523 +           " sl" #_level "mfn = %" PRI_mfn                              \
   3.524 +           " Error: " _fmt "\n",                                        \
   3.525 +           GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS,                   \
   3.526 +           _level,                                                      \
   3.527 +           mfn_x(gl ## _level ## mfn), mfn_x(sl ## _level ## mfn),      \
   3.528 +           ##_a);                                                       \
   3.529 +    BUG();                                                              \
   3.530 +    done = 1;                                                           \
   3.531 +} while (0)
   3.532  
   3.533  static char * sh_audit_flags(struct vcpu *v, int level,
   3.534                                int gflags, int sflags) 
   3.535 @@ -4494,6 +4800,16 @@ int sh_audit_l1_table(struct vcpu *v, mf
   3.536      
   3.537      /* Follow the backpointer */
   3.538      gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
   3.539 +
   3.540 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   3.541 +    /* Out-of-sync l1 shadows can contain anything: just check the OOS hash */
   3.542 +    if ( page_is_out_of_sync(mfn_to_page(gl1mfn)) )
   3.543 +    {
   3.544 +        oos_audit_hash_is_present(v->domain, gl1mfn);
   3.545 +        return 0;
   3.546 +    }
   3.547 +#endif
   3.548 +
   3.549      gl1e = gp = sh_map_domain_page(gl1mfn);
   3.550      SHADOW_FOREACH_L1E(sl1mfn, sl1e, &gl1e, done, {
   3.551  
   3.552 @@ -4574,6 +4890,13 @@ int sh_audit_l2_table(struct vcpu *v, mf
   3.553  
   3.554      /* Follow the backpointer */
   3.555      gl2mfn = _mfn(mfn_to_shadow_page(sl2mfn)->backpointer);
   3.556 +
   3.557 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
   3.558 +    /* Only L1's may be out of sync. */
   3.559 +    if ( page_is_out_of_sync(mfn_to_page(gl2mfn)) )
   3.560 +        AUDIT_FAIL_MIN(2, "gmfn %lx is out of sync", mfn_x(gl2mfn));
   3.561 +#endif
   3.562 +
   3.563      gl2e = gp = sh_map_domain_page(gl2mfn);
   3.564      SHADOW_FOREACH_L2E(sl2mfn, sl2e, &gl2e, done, v->domain, {
   3.565  
   3.566 @@ -4616,6 +4939,13 @@ int sh_audit_l3_table(struct vcpu *v, mf
   3.567  
   3.568      /* Follow the backpointer */
   3.569      gl3mfn = _mfn(mfn_to_shadow_page(sl3mfn)->backpointer);
   3.570 +
   3.571 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   3.572 +    /* Only L1's may be out of sync. */
   3.573 +    if ( page_is_out_of_sync(mfn_to_page(gl3mfn)) )
   3.574 +        AUDIT_FAIL_MIN(3, "gmfn %lx is out of sync", mfn_x(gl3mfn));
   3.575 +#endif
   3.576 +
   3.577      gl3e = gp = sh_map_domain_page(gl3mfn);
   3.578      SHADOW_FOREACH_L3E(sl3mfn, sl3e, &gl3e, done, {
   3.579  
   3.580 @@ -4656,6 +4986,13 @@ int sh_audit_l4_table(struct vcpu *v, mf
   3.581  
   3.582      /* Follow the backpointer */
   3.583      gl4mfn = _mfn(mfn_to_shadow_page(sl4mfn)->backpointer);
   3.584 +
   3.585 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
   3.586 +    /* Only L1's may be out of sync. */
   3.587 +    if ( page_is_out_of_sync(mfn_to_page(gl4mfn)) )
   3.588 +        AUDIT_FAIL_MIN(4, "gmfn %lx is out of sync", mfn_x(gl4mfn));
   3.589 +#endif
   3.590 +
   3.591      gl4e = gp = sh_map_domain_page(gl4mfn);
   3.592      SHADOW_FOREACH_L4E(sl4mfn, sl4e, &gl4e, done, v->domain,
   3.593      {
     4.1 --- a/xen/arch/x86/mm/shadow/multi.h	Fri Jun 20 18:37:29 2008 +0100
     4.2 +++ b/xen/arch/x86/mm/shadow/multi.h	Fri Jun 20 18:39:45 2008 +0100
     4.3 @@ -115,3 +115,13 @@ SHADOW_INTERNAL_NAME(sh_destroy_monitor_
     4.4  
     4.5  extern struct paging_mode 
     4.6  SHADOW_INTERNAL_NAME(sh_paging_mode, GUEST_LEVELS);
     4.7 +
     4.8 +#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
     4.9 +extern void 
    4.10 +SHADOW_INTERNAL_NAME(sh_resync_l1, GUEST_LEVELS)
    4.11 +    (struct vcpu *v, mfn_t gmfn);
    4.12 +
    4.13 +extern int
    4.14 +SHADOW_INTERNAL_NAME(sh_safe_not_to_sync, GUEST_LEVELS)
    4.15 +     (struct vcpu*v, mfn_t gmfn);
    4.16 +#endif
     5.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri Jun 20 18:37:29 2008 +0100
     5.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Jun 20 18:39:45 2008 +0100
     5.3 @@ -63,8 +63,9 @@ extern int shadow_audit_enable;
     5.4  #define SHOPT_SKIP_VERIFY         0x20  /* Skip PTE v'fy when safe to do so */
     5.5  #define SHOPT_VIRTUAL_TLB         0x40  /* Cache guest v->p translations */
     5.6  #define SHOPT_FAST_EMULATION      0x80  /* Fast write emulation */
     5.7 +#define SHOPT_OUT_OF_SYNC        0x100  /* Allow guest writes to L1 PTs */
     5.8  
     5.9 -#define SHADOW_OPTIMIZATIONS      0xff
    5.10 +#define SHADOW_OPTIMIZATIONS     0x1ff
    5.11  
    5.12  
    5.13  /******************************************************************************
    5.14 @@ -301,6 +302,62 @@ static inline int sh_type_is_pinnable(st
    5.15  #define SHF_PAE (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE|SHF_L2H_PAE)
    5.16  #define SHF_64  (SHF_L1_64|SHF_FL1_64|SHF_L2_64|SHF_L2H_64|SHF_L3_64|SHF_L4_64)
    5.17  
    5.18 +#define SHF_L1_ANY  (SHF_L1_32|SHF_L1_PAE|SHF_L1_64)
    5.19 +
    5.20 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
    5.21 +/* Marks a guest L1 page table which is shadowed but not write-protected.
    5.22 + * If set, then *only* L1 shadows (SHF_L1_*) are allowed. 
    5.23 + *
    5.24 + * out_of_sync indicates that the shadow tables may not reflect the
    5.25 + * guest tables.  If it is clear, then the shadow tables *must* reflect
    5.26 + * the guest tables.
    5.27 + *
    5.28 + * oos_may_write indicates that a page may have writable mappings.
    5.29 + *
    5.30 + * Most of the time the flags are synonymous.  There is a short period of time 
    5.31 + * during resync that oos_may_write is clear but out_of_sync is not.  If a 
    5.32 + * codepath is called during that time and is sensitive to oos issues, it may 
    5.33 + * need to use the second flag.
    5.34 + */
    5.35 +#define SHF_out_of_sync (1u<<30)
    5.36 +#define SHF_oos_may_write (1u<<29)
    5.37 +#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
    5.38 +
    5.39 +static inline int sh_page_has_multiple_shadows(struct page_info *pg)
    5.40 +{
    5.41 +    u32 shadows;
    5.42 +    if ( !(pg->count_info & PGC_page_table) )
    5.43 +        return 0;
    5.44 +    shadows = pg->shadow_flags & SHF_page_type_mask;
    5.45 +    /* More than one type bit set in shadow-flags? */
    5.46 +    return ( (shadows & ~(1UL << find_first_set_bit(shadows))) != 0 );
    5.47 +}
    5.48 +
    5.49 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
    5.50 +/* The caller must verify this is reasonable to call; i.e., valid mfn,
    5.51 + * domain is translated, &c */
    5.52 +static inline int page_is_out_of_sync(struct page_info *p) 
    5.53 +{
    5.54 +    return (p->count_info & PGC_page_table)
    5.55 +        && (p->shadow_flags & SHF_out_of_sync);
    5.56 +}
    5.57 +
    5.58 +static inline int mfn_is_out_of_sync(mfn_t gmfn) 
    5.59 +{
    5.60 +    return page_is_out_of_sync(mfn_to_page(mfn_x(gmfn)));
    5.61 +}
    5.62 +
    5.63 +static inline int page_oos_may_write(struct page_info *p) 
    5.64 +{
    5.65 +    return (p->count_info & PGC_page_table)
    5.66 +        && (p->shadow_flags & SHF_oos_may_write);
    5.67 +}
    5.68 +
    5.69 +static inline int mfn_oos_may_write(mfn_t gmfn) 
    5.70 +{
    5.71 +    return page_oos_may_write(mfn_to_page(mfn_x(gmfn)));
    5.72 +}
    5.73 +#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
    5.74  
    5.75  /******************************************************************************
    5.76   * Various function declarations 
    5.77 @@ -351,7 +408,50 @@ int shadow_write_guest_entry(struct vcpu
    5.78  int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
    5.79                                 intpte_t *old, intpte_t new, mfn_t gmfn);
    5.80  
    5.81 +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
    5.82 +/* Allow a shadowed page to go out of sync */
    5.83 +int sh_unsync(struct vcpu *v, mfn_t gmfn, unsigned long va);
    5.84  
    5.85 +/* Pull an out-of-sync page back into sync. */
    5.86 +void sh_resync(struct vcpu *v, mfn_t gmfn);
    5.87 +
    5.88 +/* Pull all out-of-sync shadows back into sync.  If skip != 0, we try
    5.89 + * to avoid resyncing where we think we can get away with it. */
    5.90 +
    5.91 +void sh_resync_all(struct vcpu *v, int skip, int this, int others, int do_locking);
    5.92 +
    5.93 +static inline void
    5.94 +shadow_resync_all(struct vcpu *v, int do_locking)
    5.95 +{
    5.96 +    sh_resync_all(v,
    5.97 +                  0 /* skip */,
    5.98 +                  1 /* this */,
    5.99 +                  1 /* others */,
   5.100 +                  do_locking);
   5.101 +}
   5.102 +
   5.103 +static inline void
   5.104 +shadow_resync_current_vcpu(struct vcpu *v, int do_locking)
   5.105 +{
   5.106 +    sh_resync_all(v,
   5.107 +                  0 /* skip */,
   5.108 +                  1 /* this */, 
   5.109 +                  0 /* others */,
   5.110 +                  do_locking);
   5.111 +}
   5.112 +
   5.113 +static inline void
   5.114 +shadow_sync_other_vcpus(struct vcpu *v, int do_locking)
   5.115 +{
   5.116 +    sh_resync_all(v,
   5.117 +                  1 /* skip */, 
   5.118 +                  0 /* this */,
   5.119 +                  1 /* others */,
   5.120 +                  do_locking);
   5.121 +}
   5.122 +
   5.123 +void oos_audit_hash_is_present(struct domain *d, mfn_t gmfn);
   5.124 +#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
   5.125  
   5.126  /******************************************************************************
   5.127   * Flags used in the return value of the shadow_set_lXe() functions...
     6.1 --- a/xen/arch/x86/mm/shadow/types.h	Fri Jun 20 18:37:29 2008 +0100
     6.2 +++ b/xen/arch/x86/mm/shadow/types.h	Fri Jun 20 18:39:45 2008 +0100
     6.3 @@ -438,6 +438,10 @@ struct shadow_walk_t
     6.4  #define sh_guess_wrmap             INTERNAL_NAME(sh_guess_wrmap)
     6.5  #define sh_clear_shadow_entry      INTERNAL_NAME(sh_clear_shadow_entry)
     6.6  
     6.7 +#if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
     6.8 +#define sh_resync_l1               INTERNAL_NAME(sh_resync_l1)
     6.9 +#define sh_safe_not_to_sync        INTERNAL_NAME(sh_safe_not_to_sync)
    6.10 +#endif
    6.11  
    6.12  /* The sh_guest_(map|get)_* functions depends on Xen's paging levels */
    6.13  #define sh_guest_map_l1e \
     7.1 --- a/xen/include/asm-x86/domain.h	Fri Jun 20 18:37:29 2008 +0100
     7.2 +++ b/xen/include/asm-x86/domain.h	Fri Jun 20 18:39:45 2008 +0100
     7.3 @@ -103,6 +103,9 @@ struct shadow_domain {
     7.4       * emulation and remove write permission
     7.5       */
     7.6      atomic_t          gtable_dirty_version;
     7.7 +
     7.8 +    /* OOS */
     7.9 +    int oos_active;
    7.10  };
    7.11  
    7.12  struct shadow_vcpu {
    7.13 @@ -122,6 +125,10 @@ struct shadow_vcpu {
    7.14      unsigned long last_emulated_frame;
    7.15      /* Last MFN that we emulated a write successfully */
    7.16      unsigned long last_emulated_mfn;
    7.17 +
    7.18 +    /* Shadow out-of-sync: pages that this vcpu has let go out of sync */
    7.19 +    mfn_t oos[SHADOW_OOS_PAGES];
    7.20 +    unsigned long oos_va[SHADOW_OOS_PAGES];
    7.21  };
    7.22  
    7.23  /************************************************/
     8.1 --- a/xen/include/asm-x86/mm.h	Fri Jun 20 18:37:29 2008 +0100
     8.2 +++ b/xen/include/asm-x86/mm.h	Fri Jun 20 18:39:45 2008 +0100
     8.3 @@ -130,6 +130,9 @@ static inline u32 pickle_domptr(struct d
     8.4  /* The order of the largest allocation unit we use for shadow pages */
     8.5  #define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
     8.6  
     8.7 +/* The number of out-of-sync shadows we allow per vcpu (prime, please) */
     8.8 +#define SHADOW_OOS_PAGES 7
     8.9 +
    8.10  #define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
    8.11  #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
    8.12  
     9.1 --- a/xen/include/asm-x86/perfc_defn.h	Fri Jun 20 18:37:29 2008 +0100
     9.2 +++ b/xen/include/asm-x86/perfc_defn.h	Fri Jun 20 18:39:45 2008 +0100
     9.3 @@ -80,6 +80,7 @@ PERFCOUNTER(shadow_writeable_h_2,  "shad
     9.4  PERFCOUNTER(shadow_writeable_h_3,  "shadow writeable: 64b w2k3")
     9.5  PERFCOUNTER(shadow_writeable_h_4,  "shadow writeable: linux low/solaris")
     9.6  PERFCOUNTER(shadow_writeable_h_5,  "shadow writeable: linux high")
     9.7 +PERFCOUNTER(shadow_writeable_h_6,  "shadow writeable: unsync va")
     9.8  PERFCOUNTER(shadow_writeable_bf,   "shadow writeable brute-force")
     9.9  PERFCOUNTER(shadow_mappings,       "shadow removes all mappings")
    9.10  PERFCOUNTER(shadow_mappings_bf,    "shadow rm-mappings brute-force")
    9.11 @@ -101,4 +102,8 @@ PERFCOUNTER(shadow_em_ex_pt,       "shad
    9.12  PERFCOUNTER(shadow_em_ex_non_pt,   "shadow extra non-pt-write op")
    9.13  PERFCOUNTER(shadow_em_ex_fail,     "shadow extra emulation failed")
    9.14  
    9.15 +PERFCOUNTER(shadow_unsync,         "shadow OOS unsyncs")
    9.16 +PERFCOUNTER(shadow_unsync_evict,   "shadow OOS evictions")
    9.17 +PERFCOUNTER(shadow_resync,         "shadow OOS resyncs")
    9.18 +
    9.19  /*#endif*/ /* __XEN_PERFC_DEFN_H__ */