ia64/xen-unstable

changeset 17080:03d13b696027

Provide fast write emulation path to release shadow lock.

Basically we can consider shadow fault logic into two parts,
with 1st part to cover logistic work like validating guest
page table or fix shadow table, and the 2nd part for write
emulation.

However there's one scenario we can optimize to skip the
1st part. For previous successfully emulated virtual frame,
it's very likely approaching at write emulation logic again
if next adjacent shadow fault is hitting same virtual frame.
It's wasteful to re-walk 1st part which is already covered
by last shadow fault. In this case, actually we can jump to
emulation code early, without any lock acquisition until
final shadow validation for write emulation. By perfc counts
on 64bit SMP HVM guest, 89% of total shadow write emulation
are observed falling into this fast path when doing kernel
build in guest.

Signed-off-by Kevin Tian <kevin.tian@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Feb 15 12:33:11 2008 +0000 (2008-02-15)
parents 38b532eea3bf
children 29c03bc32d3e
files xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/include/asm-x86/domain.h xen/include/asm-x86/perfc_defn.h
line diff
     1.1 --- a/xen/arch/x86/mm/shadow/common.c	Fri Feb 15 09:54:28 2008 +0000
     1.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri Feb 15 12:33:11 2008 +0000
     1.3 @@ -1039,13 +1039,18 @@ void shadow_free(struct domain *d, mfn_t
     1.4  
     1.5      for ( i = 0; i < 1<<order; i++ ) 
     1.6      {
     1.7 -#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
     1.8 +#if SHADOW_OPTIMIZATIONS & (SHOPT_WRITABLE_HEURISTIC | SHOPT_FAST_EMULATION)
     1.9          struct vcpu *v;
    1.10          for_each_vcpu(d, v) 
    1.11          {
    1.12 +#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
    1.13              /* No longer safe to look for a writeable mapping in this shadow */
    1.14              if ( v->arch.paging.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) 
    1.15                  v->arch.paging.shadow.last_writeable_pte_smfn = 0;
    1.16 +#endif
    1.17 +#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
    1.18 +            v->arch.paging.last_write_emul_ok = 0;
    1.19 +#endif
    1.20          }
    1.21  #endif
    1.22          /* Strip out the type: this is now a free shadow page */
     2.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Feb 15 09:54:28 2008 +0000
     2.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Feb 15 12:33:11 2008 +0000
     2.3 @@ -2623,13 +2623,13 @@ sh_map_and_validate_gl1e(struct vcpu *v,
     2.4  static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
     2.5  {
     2.6  #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
     2.7 -    if ( v->arch.paging.shadow.last_emulated_mfn == mfn_x(gmfn) &&
     2.8 -         sh_mfn_is_a_page_table(gmfn) )
     2.9 +    if ( v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn)
    2.10 +         && sh_mfn_is_a_page_table(gmfn) )
    2.11      {
    2.12          perfc_incr(shadow_early_unshadow);
    2.13          sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
    2.14      }
    2.15 -    v->arch.paging.shadow.last_emulated_mfn = mfn_x(gmfn);
    2.16 +    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
    2.17  #endif
    2.18  }
    2.19  
    2.20 @@ -2637,7 +2637,7 @@ static inline void check_for_early_unsha
    2.21  static inline void reset_early_unshadow(struct vcpu *v)
    2.22  {
    2.23  #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
    2.24 -    v->arch.paging.shadow.last_emulated_mfn = INVALID_MFN;
    2.25 +    v->arch.paging.shadow.last_emulated_mfn_for_unshadow = INVALID_MFN;
    2.26  #endif
    2.27  }
    2.28  
    2.29 @@ -2744,12 +2744,39 @@ static int sh_page_fault(struct vcpu *v,
    2.30      int r;
    2.31      fetch_type_t ft = 0;
    2.32      p2m_type_t p2mt;
    2.33 +#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
    2.34 +    int fast_emul = 0;
    2.35 +#endif
    2.36  
    2.37      SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u, rip=%lx\n",
    2.38                    v->domain->domain_id, v->vcpu_id, va, regs->error_code,
    2.39                    regs->rip);
    2.40  
    2.41      perfc_incr(shadow_fault);
    2.42 +
    2.43 +#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
    2.44 +    /* If faulting frame is successfully emulated in last shadow fault
    2.45 +     * it's highly likely to reach same emulation action for this frame.
    2.46 +     * Then try to emulate early to avoid lock aquisition.
    2.47 +     */
    2.48 +    if ( v->arch.paging.last_write_emul_ok
    2.49 +         && v->arch.paging.shadow.last_emulated_frame == (va >> PAGE_SHIFT) ) 
    2.50 +    {
    2.51 +        /* check whether error code is 3, or else fall back to normal path
    2.52 +         * in case of some validation is required
    2.53 +         */
    2.54 +        if ( regs->error_code == (PFEC_write_access | PFEC_page_present) )
    2.55 +        {
    2.56 +            fast_emul = 1;
    2.57 +            gmfn = v->arch.paging.shadow.last_emulated_mfn;
    2.58 +            perfc_incr(shadow_fault_fast_emulate);
    2.59 +            goto early_emulation;
    2.60 +        }
    2.61 +        else
    2.62 +            v->arch.paging.last_write_emul_ok = 0;
    2.63 +    }
    2.64 +#endif
    2.65 +
    2.66      //
    2.67      // XXX: Need to think about eventually mapping superpages directly in the
    2.68      //      shadow (when possible), as opposed to splintering them into a
    2.69 @@ -2960,6 +2987,17 @@ static int sh_page_fault(struct vcpu *v,
    2.70          goto done;
    2.71      }
    2.72  
    2.73 +    /*
    2.74 +     * We don't need to hold the lock for the whole emulation; we will
    2.75 +     * take it again when we write to the pagetables.
    2.76 +     */
    2.77 +    sh_audit_gw(v, &gw);
    2.78 +    shadow_audit_tables(v);
    2.79 +    shadow_unlock(d);
    2.80 +
    2.81 +#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
    2.82 + early_emulation:
    2.83 +#endif
    2.84      if ( is_hvm_domain(d) )
    2.85      {
    2.86          /*
    2.87 @@ -2971,6 +3009,13 @@ static int sh_page_fault(struct vcpu *v,
    2.88           */
    2.89          if ( unlikely(hvm_event_pending(v)) )
    2.90          {
    2.91 +#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
    2.92 +            if ( fast_emul )
    2.93 +            {
    2.94 +                perfc_incr(shadow_fault_fast_emulate_fail);
    2.95 +                v->arch.paging.last_write_emul_ok = 0;
    2.96 +            }
    2.97 +#endif
    2.98              gdprintk(XENLOG_DEBUG, "write to pagetable during event "
    2.99                       "injection: cr2=%#lx, mfn=%#lx\n", 
   2.100                       va, mfn_x(gmfn));
   2.101 @@ -2982,14 +3027,6 @@ static int sh_page_fault(struct vcpu *v,
   2.102      SHADOW_PRINTK("emulate: eip=%#lx esp=%#lx\n", 
   2.103                    (unsigned long)regs->eip, (unsigned long)regs->esp);
   2.104  
   2.105 -    /*
   2.106 -     * We don't need to hold the lock for the whole emulation; we will
   2.107 -     * take it again when we write to the pagetables.
   2.108 -     */
   2.109 -    sh_audit_gw(v, &gw);
   2.110 -    shadow_audit_tables(v);
   2.111 -    shadow_unlock(d);
   2.112 -
   2.113      emul_ops = shadow_init_emulation(&emul_ctxt, regs);
   2.114  
   2.115      r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
   2.116 @@ -3001,15 +3038,44 @@ static int sh_page_fault(struct vcpu *v,
   2.117       */
   2.118      if ( r == X86EMUL_UNHANDLEABLE )
   2.119      {
   2.120 +        perfc_incr(shadow_fault_emulate_failed);
   2.121 +#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
   2.122 +        if ( fast_emul )
   2.123 +        {
   2.124 +            perfc_incr(shadow_fault_fast_emulate_fail);
   2.125 +            v->arch.paging.last_write_emul_ok = 0;
   2.126 +        }
   2.127 +#endif
   2.128          SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n", 
   2.129                         mfn_x(gmfn));
   2.130 -        perfc_incr(shadow_fault_emulate_failed);
   2.131          /* If this is actually a page table, then we have a bug, and need 
   2.132           * to support more operations in the emulator.  More likely, 
   2.133           * though, this is a hint that this page should not be shadowed. */
   2.134          shadow_remove_all_shadows(v, gmfn);
   2.135      }
   2.136  
   2.137 +#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
   2.138 +    /* Record successfully emulated information as heuristics to next
   2.139 +     * fault on same frame for acceleration. But be careful to verify
   2.140 +     * its attribute still as page table, or else unshadow triggered
   2.141 +     * in write emulation normally requires a re-sync with guest page
   2.142 +     * table to recover r/w permission. Incorrect record for such case
   2.143 +     * will cause unexpected more shadow faults due to propagation is
   2.144 +     * skipped.
   2.145 +     */
   2.146 +    if ( (r == X86EMUL_OKAY) && sh_mfn_is_a_page_table(gmfn) )
   2.147 +    {
   2.148 +        if ( !fast_emul )
   2.149 +        {
   2.150 +            v->arch.paging.shadow.last_emulated_frame = va >> PAGE_SHIFT;
   2.151 +            v->arch.paging.shadow.last_emulated_mfn = gmfn;
   2.152 +            v->arch.paging.last_write_emul_ok = 1;
   2.153 +        }
   2.154 +    }
   2.155 +    else if ( fast_emul )
   2.156 +        v->arch.paging.last_write_emul_ok = 0;
   2.157 +#endif
   2.158 +
   2.159  #if GUEST_PAGING_LEVELS == 3 /* PAE guest */
   2.160      if ( r == X86EMUL_OKAY ) {
   2.161          int i;
   2.162 @@ -3079,6 +3145,10 @@ sh_invlpg(struct vcpu *v, unsigned long 
   2.163      vtlb_flush(v);
   2.164  #endif
   2.165  
   2.166 +#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
   2.167 +    v->arch.paging.last_write_emul_ok = 0;
   2.168 +#endif
   2.169 +
   2.170      /* First check that we can safely read the shadow l2e.  SMP/PAE linux can
   2.171       * run as high as 6% of invlpg calls where we haven't shadowed the l2 
   2.172       * yet. */
   2.173 @@ -3815,6 +3885,10 @@ sh_update_cr3(struct vcpu *v, int do_loc
   2.174      vtlb_flush(v);
   2.175  #endif
   2.176  
   2.177 +#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
   2.178 +    v->arch.paging.last_write_emul_ok = 0;
   2.179 +#endif
   2.180 +
   2.181      /* Release the lock, if we took it (otherwise it's the caller's problem) */
   2.182      if ( do_locking ) shadow_unlock(v->domain);
   2.183  }
     3.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri Feb 15 09:54:28 2008 +0000
     3.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Feb 15 12:33:11 2008 +0000
     3.3 @@ -62,8 +62,9 @@ extern int shadow_audit_enable;
     3.4  #define SHOPT_LINUX_L3_TOPLEVEL   0x10  /* Pin l3es on early 64bit linux */
     3.5  #define SHOPT_SKIP_VERIFY         0x20  /* Skip PTE v'fy when safe to do so */
     3.6  #define SHOPT_VIRTUAL_TLB         0x40  /* Cache guest v->p translations */
     3.7 +#define SHOPT_FAST_EMULATION      0x80  /* Fast write emulation */
     3.8  
     3.9 -#define SHADOW_OPTIMIZATIONS      0x7f
    3.10 +#define SHADOW_OPTIMIZATIONS      0xff
    3.11  
    3.12  
    3.13  /******************************************************************************
     4.1 --- a/xen/include/asm-x86/domain.h	Fri Feb 15 09:54:28 2008 +0000
     4.2 +++ b/xen/include/asm-x86/domain.h	Fri Feb 15 12:33:11 2008 +0000
     4.3 @@ -108,10 +108,14 @@ struct shadow_vcpu {
     4.4  #endif
     4.5      /* Non-PAE guests: pointer to guest top-level pagetable */
     4.6      void *guest_vtable;
     4.7 -    /* Last MFN that we emulated a write to. */
     4.8 -    unsigned long last_emulated_mfn;
     4.9 +    /* Last MFN that we emulated a write to as unshadow heuristics. */
    4.10 +    unsigned long last_emulated_mfn_for_unshadow;
    4.11      /* MFN of the last shadow that we shot a writeable mapping in */
    4.12      unsigned long last_writeable_pte_smfn;
    4.13 +    /* Last frame number that we emulated a write to. */
    4.14 +    unsigned long last_emulated_frame;
    4.15 +    /* Last MFN that we emulated a write successfully */
    4.16 +    unsigned long last_emulated_mfn;
    4.17  };
    4.18  
    4.19  /************************************************/
    4.20 @@ -189,6 +193,8 @@ struct paging_vcpu {
    4.21      struct paging_mode *mode;
    4.22      /* HVM guest: last emulate was to a pagetable */
    4.23      unsigned int last_write_was_pt:1;
    4.24 +    /* HVM guest: last write emulation succeeds */
    4.25 +    unsigned int last_write_emul_ok:1;
    4.26      /* Translated guest: virtual TLB */
    4.27      struct shadow_vtlb *vtlb;
    4.28      spinlock_t          vtlb_lock;
     5.1 --- a/xen/include/asm-x86/perfc_defn.h	Fri Feb 15 09:54:28 2008 +0000
     5.2 +++ b/xen/include/asm-x86/perfc_defn.h	Fri Feb 15 12:33:11 2008 +0000
     5.3 @@ -57,6 +57,9 @@ PERFCOUNTER(shadow_fault_emulate_write, 
     5.4  PERFCOUNTER(shadow_fault_emulate_failed, "shadow_fault emulator fails")
     5.5  PERFCOUNTER(shadow_fault_emulate_stack, "shadow_fault emulate stack write")
     5.6  PERFCOUNTER(shadow_fault_emulate_wp, "shadow_fault emulate for CR0.WP=0")
     5.7 +PERFCOUNTER(shadow_fault_fast_emulate, "shadow_fault fast emulate")
     5.8 +PERFCOUNTER(shadow_fault_fast_emulate_fail,
     5.9 +                                   "shadow_fault fast emulate failed")
    5.10  PERFCOUNTER(shadow_fault_mmio,     "shadow_fault handled as mmio")
    5.11  PERFCOUNTER(shadow_fault_fixed,    "shadow_fault fixed fault")
    5.12  PERFCOUNTER(shadow_ptwr_emulate,   "shadow causes ptwr to emulate")